aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/AliasAnalysis.cpp12
-rw-r--r--llvm/lib/Analysis/Analysis.cpp1
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp3
-rw-r--r--llvm/lib/Analysis/CaptureTracking.cpp7
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp21
-rw-r--r--llvm/lib/Analysis/CycleAnalysis.cpp77
-rw-r--r--llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp131
-rw-r--r--llvm/lib/Analysis/IVDescriptors.cpp2
-rw-r--r--llvm/lib/Analysis/InlineAdvisor.cpp11
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp112
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp21
-rw-r--r--llvm/lib/Analysis/MLInlineAdvisor.cpp62
-rw-r--r--llvm/lib/Analysis/MemDerefPrinter.cpp8
-rw-r--r--llvm/lib/Analysis/MemoryBuiltins.cpp14
-rw-r--r--llvm/lib/Analysis/MemoryLocation.cpp81
-rw-r--r--llvm/lib/Analysis/ModelUnderTrainingRunner.cpp49
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp18
-rw-r--r--llvm/lib/Analysis/NoInferenceModelRunner.cpp33
-rw-r--r--llvm/lib/Analysis/ReleaseModeModelRunner.cpp90
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp34
-rw-r--r--llvm/lib/Analysis/TargetLibraryInfo.cpp5
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp15
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp32
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp2
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp36
-rw-r--r--llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp6
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp25
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp35
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp8
-rw-r--r--llvm/lib/Bitcode/Writer/ValueEnumerator.cpp16
-rw-r--r--llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp9
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp22
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp29
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp25
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp10
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp11
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h19
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp59
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp39
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp3
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp4
-rw-r--r--llvm/lib/CodeGen/CalcSpillWeights.cpp22
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp11
-rw-r--r--llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp3
-rw-r--r--llvm/lib/CodeGen/DeadMachineInstructionElim.cpp6
-rw-r--r--llvm/lib/CodeGen/EarlyIfConversion.cpp20
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp36
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Combiner.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp9
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp8
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp1105
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp48
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp39
-rw-r--r--llvm/lib/CodeGen/ImplicitNullChecks.cpp2
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp4
-rw-r--r--llvm/lib/CodeGen/InterferenceCache.cpp4
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp4
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp16
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp11
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.h5
-rw-r--r--llvm/lib/CodeGen/LiveRangeEdit.cpp16
-rw-r--r--llvm/lib/CodeGen/LiveVariables.cpp14
-rw-r--r--llvm/lib/CodeGen/LocalStackSlotAllocation.cpp6
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIRParser.cpp18
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp31
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp161
-rw-r--r--llvm/lib/CodeGen/MachineCombiner.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp28
-rw-r--r--llvm/lib/CodeGen/MachineCycleAnalysis.cpp113
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp32
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp32
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp73
-rw-r--r--llvm/lib/CodeGen/MachineSSAContext.cpp52
-rw-r--r--llvm/lib/CodeGen/MachineSSAUpdater.cpp27
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp21
-rw-r--r--llvm/lib/CodeGen/MachineTraceMetrics.cpp25
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp82
-rw-r--r--llvm/lib/CodeGen/PHIElimination.cpp4
-rw-r--r--llvm/lib/CodeGen/PostRASchedulerList.cpp17
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp14
-rw-r--r--llvm/lib/CodeGen/RDFGraph.cpp4
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp121
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.h210
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp211
-rw-r--r--llvm/lib/CodeGen/RegAllocPBQP.cpp4
-rw-r--r--llvm/lib/CodeGen/RegAllocScore.cpp124
-rw-r--r--llvm/lib/CodeGen/RegAllocScore.h80
-rw-r--r--llvm/lib/CodeGen/RegisterClassInfo.cpp3
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp8
-rw-r--r--llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp17
-rw-r--r--llvm/lib/CodeGen/SafeStack.cpp18
-rw-r--r--llvm/lib/CodeGen/SafeStackLayout.cpp7
-rw-r--r--llvm/lib/CodeGen/SafeStackLayout.h12
-rw-r--r--llvm/lib/CodeGen/ScheduleDAG.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp655
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp16
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp99
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp30
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp61
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp82
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp20
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp59
-rw-r--r--llvm/lib/CodeGen/ShadowStackGCLowering.cpp14
-rw-r--r--llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp10
-rw-r--r--llvm/lib/CodeGen/StackProtector.cpp21
-rw-r--r--llvm/lib/CodeGen/StackSlotColoring.cpp10
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp42
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp15
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp2
-rw-r--r--llvm/lib/CodeGen/TargetRegisterInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/UnreachableBlockElim.cpp21
-rw-r--r--llvm/lib/CodeGen/VLIWMachineScheduler.cpp1009
-rw-r--r--llvm/lib/CodeGen/ValueTypes.cpp4
-rw-r--r--llvm/lib/CodeGen/WinEHPrepare.cpp15
-rw-r--r--llvm/lib/CodeGen/XRayInstrumentation.cpp1
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinker.cpp46
-rw-r--r--llvm/lib/DWARFLinker/DWARFStreamer.cpp4
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp15
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp11
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp6
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp4
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp4
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp47
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp12
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp80
-rw-r--r--llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp4
-rw-r--r--llvm/lib/DebugInfo/MSF/MSFBuilder.cpp26
-rw-r--r--llvm/lib/DebugInfo/MSF/MSFError.cpp8
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp4
-rw-r--r--llvm/lib/DebugInfo/Symbolize/Symbolize.cpp10
-rw-r--r--llvm/lib/Debuginfod/Debuginfod.cpp183
-rw-r--r--llvm/lib/Debuginfod/HTTPClient.cpp216
-rw-r--r--llvm/lib/Demangle/DLangDemangle.cpp66
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp6
-rw-r--r--llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/MCJIT/MCJIT.h10
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Core.cpp30
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp18
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp13
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp44
-rw-r--r--llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Layer.cpp57
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp6
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp12
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Mangling.cpp188
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp205
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp29
-rw-r--r--llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp22
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp72
-rw-r--r--llvm/lib/IR/AsmWriter.cpp46
-rw-r--r--llvm/lib/IR/AttributeImpl.h3
-rw-r--r--llvm/lib/IR/Attributes.cpp69
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp134
-rw-r--r--llvm/lib/IR/BasicBlock.cpp4
-rw-r--r--llvm/lib/IR/ConstantFold.cpp51
-rw-r--r--llvm/lib/IR/Constants.cpp57
-rw-r--r--llvm/lib/IR/Core.cpp25
-rw-r--r--llvm/lib/IR/DIBuilder.cpp8
-rw-r--r--llvm/lib/IR/DataLayout.cpp129
-rw-r--r--llvm/lib/IR/Function.cpp17
-rw-r--r--llvm/lib/IR/Globals.cpp4
-rw-r--r--llvm/lib/IR/InlineAsm.cpp6
-rw-r--r--llvm/lib/IR/Instruction.cpp11
-rw-r--r--llvm/lib/IR/Instructions.cpp48
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp14
-rw-r--r--llvm/lib/IR/LLVMContextImpl.h7
-rw-r--r--llvm/lib/IR/LegacyPassManager.cpp12
-rw-r--r--llvm/lib/IR/Module.cpp18
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp16
-rw-r--r--llvm/lib/IR/Operator.cpp5
-rw-r--r--llvm/lib/IR/SSAContext.cpp47
-rw-r--r--llvm/lib/IR/Value.cpp2
-rw-r--r--llvm/lib/IR/Verifier.cpp87
-rw-r--r--llvm/lib/LTO/LTO.cpp4
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp3
-rw-r--r--llvm/lib/LTO/LTOCodeGenerator.cpp5
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp6
-rw-r--r--llvm/lib/LineEditor/LineEditor.cpp5
-rw-r--r--llvm/lib/Linker/IRMover.cpp4
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp20
-rw-r--r--llvm/lib/MC/MCAssembler.cpp3
-rw-r--r--llvm/lib/MC/MCInstrAnalysis.cpp2
-rw-r--r--llvm/lib/MC/MCMachOStreamer.cpp15
-rw-r--r--llvm/lib/MC/MCNullStreamer.cpp3
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp25
-rw-r--r--llvm/lib/MC/MCParser/AsmParser.cpp67
-rw-r--r--llvm/lib/MC/MCPseudoProbe.cpp4
-rw-r--r--llvm/lib/MC/MCStreamer.cpp72
-rw-r--r--llvm/lib/MC/MCWin64EH.cpp6
-rw-r--r--llvm/lib/MC/MachObjectWriter.cpp88
-rw-r--r--llvm/lib/MC/TargetRegistry.cpp8
-rw-r--r--llvm/lib/Object/ArchiveWriter.cpp2
-rw-r--r--llvm/lib/Object/ELF.cpp2
-rw-r--r--llvm/lib/Object/MachOObjectFile.cpp46
-rw-r--r--llvm/lib/Object/MachOUniversalWriter.cpp1
-rw-r--r--llvm/lib/ObjectYAML/COFFEmitter.cpp12
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp48
-rw-r--r--llvm/lib/ObjectYAML/XCOFFEmitter.cpp10
-rw-r--r--llvm/lib/ObjectYAML/YAML.cpp5
-rw-r--r--llvm/lib/Option/OptTable.cpp10
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp2
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp80
-rw-r--r--llvm/lib/Passes/PassRegistry.def4
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp140
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp25
-rw-r--r--llvm/lib/ProfileData/InstrProfCorrelator.cpp264
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp125
-rw-r--r--llvm/lib/ProfileData/InstrProfWriter.cpp1
-rw-r--r--llvm/lib/ProfileData/ProfileSummaryBuilder.cpp2
-rw-r--r--llvm/lib/ProfileData/SampleProf.cpp121
-rw-r--r--llvm/lib/ProfileData/SampleProfReader.cpp115
-rw-r--r--llvm/lib/ProfileData/SampleProfWriter.cpp75
-rw-r--r--llvm/lib/Support/AArch64TargetParser.cpp2
-rw-r--r--llvm/lib/Support/Caching.cpp13
-rw-r--r--llvm/lib/Support/CommandLine.cpp33
-rw-r--r--llvm/lib/Support/Compression.cpp8
-rw-r--r--llvm/lib/Support/ConvertUTFWrapper.cpp4
-rw-r--r--llvm/lib/Support/DAGDeltaAlgorithm.cpp68
-rw-r--r--llvm/lib/Support/DeltaAlgorithm.cpp5
-rw-r--r--llvm/lib/Support/HTTPClient.cpp97
-rw-r--r--llvm/lib/Support/KnownBits.cpp18
-rw-r--r--llvm/lib/Support/MemoryBuffer.cpp9
-rw-r--r--llvm/lib/Support/NativeFormatting.cpp2
-rw-r--r--llvm/lib/Support/Path.cpp4
-rw-r--r--llvm/lib/Support/RISCVISAInfo.cpp247
-rw-r--r--llvm/lib/Support/ScopedPrinter.cpp10
-rw-r--r--llvm/lib/Support/Signals.cpp6
-rw-r--r--llvm/lib/Support/SourceMgr.cpp3
-rw-r--r--llvm/lib/Support/Statistic.cpp17
-rw-r--r--llvm/lib/Support/TargetParser.cpp15
-rw-r--r--llvm/lib/Support/ThreadPool.cpp24
-rw-r--r--llvm/lib/Support/Triple.cpp126
-rw-r--r--llvm/lib/Support/Unix/Path.inc13
-rw-r--r--llvm/lib/Support/VirtualFileSystem.cpp6
-rw-r--r--llvm/lib/Support/YAMLParser.cpp4
-rw-r--r--llvm/lib/TableGen/StringMatcher.cpp13
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp8
-rw-r--r--llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp14
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td10
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandImm.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp8
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp11
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp222
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td64
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td95
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td44
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTagging.cpp17
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h7
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp74
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp7
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp5
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp38
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp4
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp4
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td100
-rw-r--r--llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp33
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td28
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp106
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp116
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp40
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp192
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp33
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp210
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp85
-rw-r--r--llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td8
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td10
-rw-r--r--llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.cpp48
-rw-r--r--llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp42
-rw-r--r--llvm/lib/Target/AMDGPU/R600Packetizer.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp73
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp156
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp153
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td8
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td116
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h7
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp53
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp59
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td21
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td106
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp231
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h31
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td12
-rw-r--r--llvm/lib/Target/ARM/A15SDOptimizer.cpp16
-rw-r--r--llvm/lib/Target/ARM/ARM.td5
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp36
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp217
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h27
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMBranchTargets.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.cpp7
-rw-r--r--llvm/lib/Target/ARM/ARMConstantIslandPass.cpp48
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp24
-rw-r--r--llvm/lib/Target/ARM/ARMFrameLowering.cpp94
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp221
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h1
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td116
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td7
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td3
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp16
-rw-r--r--llvm/lib/Target/ARM/ARMMachineFunctionInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterInfo.td4
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h9
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp63
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp67
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp5
-rw-r--r--llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp2
-rw-r--r--llvm/lib/Target/ARM/MVETailPredication.cpp10
-rw-r--r--llvm/lib/Target/ARM/Thumb1FrameLowering.cpp9
-rw-r--r--llvm/lib/Target/AVR/AVRFrameLowering.cpp4
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFPreserveDIType.cpp9
-rw-r--r--llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp91
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp13
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.h2
-rw-r--r--llvm/lib/Target/CSKY/CSKYCallingConv.td2
-rw-r--r--llvm/lib/Target/CSKY/CSKYFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp89
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.cpp40
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td15
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.cpp288
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.h25
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.td241
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td165
-rw-r--r--llvm/lib/Target/CSKY/CSKYMCInstLower.cpp2
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp181
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.h12
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.td5
-rw-r--r--llvm/lib/Target/Hexagon/Hexagon.td55
-rw-r--r--llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp61
-rw-r--r--llvm/lib/Target/Hexagon/HexagonAsmPrinter.h16
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp22
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp12
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepArch.h34
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepArch.td2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepDecoders.inc1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepIICHVX.td1018
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepIICScalar.td768
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td1253
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td293
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepMappings.td1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenInsert.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrFormats.td7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp964
-rw-r--r--llvm/lib/Target/Hexagon/HexagonMachineScheduler.h253
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPseudo.td11
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSchedule.td1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonScheduleV69.td40
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.cpp72
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.h27
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h5
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp38
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h1
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp18
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h4
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp32
-rw-r--r--llvm/lib/Target/M68k/M68kInstrControl.td16
-rw-r--r--llvm/lib/Target/MSP430/MSP430FrameLowering.cpp4
-rw-r--r--llvm/lib/Target/Mips/Mips16HardFloat.cpp6
-rw-r--r--llvm/lib/Target/Mips/MipsBranchExpansion.cpp52
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsInstrInfo.cpp49
-rw-r--r--llvm/lib/Target/Mips/MipsInstrInfo.h7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp12
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXPeephole.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp12
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp29
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td4
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCBack2BackFusion.def1042
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp14
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp56
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td1
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td17
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.def2
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp103
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h16
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp124
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp5
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h10
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp71
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp277
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h12
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td70
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormatsV.td32
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td52
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td235
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td285
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoM.td13
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td72
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td1229
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td65
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td250
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp1
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp91
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h11
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp32
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp10
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.td1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp180
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.h9
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp165
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrFormats.td10
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp57
-rw-r--r--llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp35
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp3
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp47
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h3
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp6
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp11
-rw-r--r--llvm/lib/Target/VE/VEISelLowering.cpp2
-rw-r--r--llvm/lib/Target/VE/VVPInstrInfo.td35
-rw-r--r--llvm/lib/Target/VE/VVPInstrPatternsVec.td185
-rw-r--r--llvm/lib/Target/VE/VVPNodes.def32
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp12
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp13
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h2
-rw-r--r--llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp22
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp23
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp9
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h2
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp7
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h8
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp25
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h7
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp14
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISD.def1
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp115
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td5
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td19
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td8
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td16
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp13
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp24
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp42
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp34
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86Operand.h6
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp24
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.h19
-rw-r--r--llvm/lib/Target/X86/X86CmovConversion.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ExpandPseudo.cpp26
-rw-r--r--llvm/lib/Target/X86/X86FastTileConfig.cpp6
-rw-r--r--llvm/lib/Target/X86/X86FixupBWInsts.cpp10
-rw-r--r--llvm/lib/Target/X86/X86FloatingPoint.cpp2
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp23
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp483
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h10
-rw-r--r--llvm/lib/Target/X86/X86IndirectBranchTracking.cpp4
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td138
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td6
-rw-r--r--llvm/lib/Target/X86/X86InstrFoldTables.cpp106
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp107
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrMMX.td40
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp246
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.td10
-rw-r--r--llvm/lib/Target/X86/X86SchedBroadwell.td26
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td32
-rw-r--r--llvm/lib/Target/X86/X86SchedIceLake.td167
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td4
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td88
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeServer.td98
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td72
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBdVer2.td36
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td24
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td8
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td18
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver2.td18
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver3.td14
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h3
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp12
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp91
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h5
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp6
-rw-r--r--llvm/lib/Transforms/CFGuard/CFGuard.cpp8
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp33
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp41
-rw-r--r--llvm/lib/Transforms/Coroutines/Coroutines.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/ArgumentPromotion.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp38
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp208
-rw-r--r--llvm/lib/Transforms/IPO/FunctionSpecialization.cpp317
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp11
-rw-r--r--llvm/lib/Transforms/IPO/HotColdSplitting.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp7
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/SampleContextTracker.cpp21
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp172
-rw-r--r--llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp104
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp58
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp44
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h1
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp20
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp18
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp7
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp7
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp161
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp39
-rw-r--r--llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp25
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp61
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp13
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp25
-rw-r--r--llvm/lib/Transforms/Scalar/ConstantHoisting.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp58
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp187
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp49
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp44
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp28
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRerollPass.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp25
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp35
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp51
-rw-r--r--llvm/lib/Transforms/Scalar/SCCP.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/CodeLayout.cpp942
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/FunctionComparator.cpp16
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp31
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp101
-rw-r--r--llvm/lib/Transforms/Utils/MetaRenamer.cpp67
-rw-r--r--llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/SampleProfileInference.cpp385
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp293
-rw-r--r--llvm/lib/Transforms/Utils/ValueMapper.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp89
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h38
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp463
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp633
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h118
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanSLP.cpp5
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp49
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h21
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp26
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp6
625 files changed, 23355 insertions, 10841 deletions
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp
index d030f74481cf..49199060786c 100644
--- a/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -249,11 +249,11 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
bool IsMustAlias = true;
ModRefInfo AllArgsMask = ModRefInfo::NoModRef;
if (doesAccessArgPointees(MRB)) {
- for (auto AI = Call->arg_begin(), AE = Call->arg_end(); AI != AE; ++AI) {
- const Value *Arg = *AI;
+ for (const auto &I : llvm::enumerate(Call->args())) {
+ const Value *Arg = I.value();
if (!Arg->getType()->isPointerTy())
continue;
- unsigned ArgIdx = std::distance(Call->arg_begin(), AI);
+ unsigned ArgIdx = I.index();
MemoryLocation ArgLoc =
MemoryLocation::getForArgument(Call, ArgIdx, TLI);
AliasResult ArgAlias = alias(ArgLoc, Loc, AAQI);
@@ -696,14 +696,16 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I,
case Instruction::AtomicRMW:
return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP);
case Instruction::Call:
- return getModRefInfo((const CallInst *)I, Loc, AAQIP);
+ case Instruction::CallBr:
case Instruction::Invoke:
- return getModRefInfo((const InvokeInst *)I, Loc, AAQIP);
+ return getModRefInfo((const CallBase *)I, Loc, AAQIP);
case Instruction::CatchPad:
return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP);
case Instruction::CatchRet:
return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP);
default:
+ assert(!I->mayReadOrWriteMemory() &&
+ "Unhandled memory access instruction!");
return ModRefInfo::NoModRef;
}
}
diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp
index db5167061509..177f38af13d8 100644
--- a/llvm/lib/Analysis/Analysis.cpp
+++ b/llvm/lib/Analysis/Analysis.cpp
@@ -35,6 +35,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFGOnlyPrinterLegacyPassPass(Registry);
initializeCFLAndersAAWrapperPassPass(Registry);
initializeCFLSteensAAWrapperPassPass(Registry);
+ initializeCycleInfoWrapperPassPass(Registry);
initializeDependenceAnalysisWrapperPassPass(Registry);
initializeDelinearizationPass(Registry);
initializeDemandedBitsWrapperPassPass(Registry);
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 88b0f37b1d48..5f1bf2001d47 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1699,6 +1699,7 @@ AliasResult BasicAAResult::aliasCheckRecursive(
return Result;
} else if (const GEPOperator *GV2 = dyn_cast<GEPOperator>(V2)) {
AliasResult Result = aliasGEP(GV2, V2Size, V1, V1Size, O2, O1, AAQI);
+ Result.swap();
if (Result != AliasResult::MayAlias)
return Result;
}
@@ -1709,6 +1710,7 @@ AliasResult BasicAAResult::aliasCheckRecursive(
return Result;
} else if (const PHINode *PN = dyn_cast<PHINode>(V2)) {
AliasResult Result = aliasPHI(PN, V2Size, V1, V1Size, AAQI);
+ Result.swap();
if (Result != AliasResult::MayAlias)
return Result;
}
@@ -1719,6 +1721,7 @@ AliasResult BasicAAResult::aliasCheckRecursive(
return Result;
} else if (const SelectInst *S2 = dyn_cast<SelectInst>(V2)) {
AliasResult Result = aliasSelect(S2, V2Size, V1, V1Size, AAQI);
+ Result.swap();
if (Result != AliasResult::MayAlias)
return Result;
}
diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp
index 8955658cb9e7..9b45f455be08 100644
--- a/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/llvm/lib/Analysis/CaptureTracking.cpp
@@ -346,13 +346,16 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
if (Tracker->captured(U))
return;
- // Not captured if only passed via 'nocapture' arguments. Note that
- // calling a function pointer does not in itself cause the pointer to
+ // Calling a function pointer does not in itself cause the pointer to
// be captured. This is a subtle point considering that (for example)
// the callee might return its own address. It is analogous to saying
// that loading a value from a pointer does not cause the pointer to be
// captured, even though the loaded value might be the pointer itself
// (think of self-referential objects).
+ if (Call->isCallee(U))
+ break;
+
+ // Not captured if only passed via 'nocapture' arguments.
if (Call->isDataOperand(U) &&
!Call->doesNotCapture(Call->getDataOperandNo(U))) {
// The parameter is not marked 'nocapture' - captured.
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 3ed3b8902343..922b38e92785 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -352,6 +352,9 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
const DataLayout &DL) {
do {
Type *SrcTy = C->getType();
+ if (SrcTy == DestTy)
+ return C;
+
TypeSize DestSize = DL.getTypeSizeInBits(DestTy);
TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);
if (!TypeSize::isKnownGE(SrcSize, DestSize))
@@ -705,7 +708,8 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
// is all undef or zero, we know what it loads.
if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C))) {
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
- if (GV->getInitializer()->isNullValue())
+ if (GV->getInitializer()->isNullValue() && !Ty->isX86_MMXTy() &&
+ !Ty->isX86_AMXTy())
return Constant::getNullValue(Ty);
if (isa<UndefValue>(GV->getInitializer()))
return UndefValue::get(Ty);
@@ -881,7 +885,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
InnermostGEP = GEP;
InBounds &= GEP->isInBounds();
- SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end());
+ SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands()));
// Do not try the incorporate the sub-GEP if some index is not a number.
bool AllConstantInt = true;
@@ -1774,15 +1778,8 @@ static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
// If the operation does not change exception status flags, it is safe
// to fold.
- if (St == APFloat::opStatus::opOK) {
- // When FP exceptions are not ignored, intrinsic call will not be
- // eliminated, because it is considered as having side effect. But we
- // know that its evaluation does not raise exceptions, so side effect
- // is absent. To allow removing the call, mark it as not accessing memory.
- if (EB && *EB != fp::ExceptionBehavior::ebIgnore)
- CI->addFnAttr(Attribute::ReadNone);
+ if (St == APFloat::opStatus::opOK)
return true;
- }
// If evaluation raised FP exception, the result can depend on rounding
// mode. If the latter is unknown, folding is not possible.
@@ -2960,10 +2957,6 @@ static Constant *ConstantFoldFixedVectorCall(
if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
unsigned Lanes = FVTy->getNumElements();
uint64_t Limit = Op->getZExtValue();
- // vctp64 are currently modelled as returning a v4i1, not a v2i1. Make
- // sure we get the limit right in that case and set all relevant lanes.
- if (IntrinsicID == Intrinsic::arm_mve_vctp64)
- Limit *= 2;
SmallVector<Constant *, 16> NCs;
for (unsigned i = 0; i < Lanes; i++) {
diff --git a/llvm/lib/Analysis/CycleAnalysis.cpp b/llvm/lib/Analysis/CycleAnalysis.cpp
new file mode 100644
index 000000000000..09c7ee67e05c
--- /dev/null
+++ b/llvm/lib/Analysis/CycleAnalysis.cpp
@@ -0,0 +1,77 @@
+//===- CycleAnalysis.cpp - Compute CycleInfo for LLVM IR ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CycleAnalysis.h"
+#include "llvm/ADT/GenericCycleImpl.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+template class llvm::GenericCycleInfo<SSAContext>;
+template class llvm::GenericCycle<SSAContext>;
+
+CycleInfo CycleAnalysis::run(Function &F, FunctionAnalysisManager &) {
+ CycleInfo CI;
+ CI.compute(F);
+ return CI;
+}
+
+AnalysisKey CycleAnalysis::Key;
+
+CycleInfoPrinterPass::CycleInfoPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+PreservedAnalyses CycleInfoPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ OS << "CycleInfo for function: " << F.getName() << "\n";
+ AM.getResult<CycleAnalysis>(F).print(OS);
+
+ return PreservedAnalyses::all();
+}
+
+//===----------------------------------------------------------------------===//
+// CycleInfoWrapperPass Implementation
+//===----------------------------------------------------------------------===//
+//
+// The implementation details of the wrapper pass that holds a CycleInfo
+// suitable for use with the legacy pass manager.
+//
+//===----------------------------------------------------------------------===//
+
+char CycleInfoWrapperPass::ID = 0;
+
+CycleInfoWrapperPass::CycleInfoWrapperPass() : FunctionPass(ID) {
+ initializeCycleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(CycleInfoWrapperPass, "cycles", "Cycle Info Analysis",
+ true, true)
+INITIALIZE_PASS_END(CycleInfoWrapperPass, "cycles", "Cycle Info Analysis", true,
+ true)
+
+void CycleInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
+
+bool CycleInfoWrapperPass::runOnFunction(Function &Func) {
+ CI.clear();
+
+ F = &Func;
+ CI.compute(Func);
+ return false;
+}
+
+void CycleInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
+ OS << "CycleInfo for function: " << F->getName() << "\n";
+ CI.print(OS);
+}
+
+void CycleInfoWrapperPass::releaseMemory() {
+ CI.clear();
+ F = nullptr;
+}
diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
index d87fa849d839..31b2dafa29b4 100644
--- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -16,6 +16,8 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
#include "llvm/Analysis/MLInlineAdvisor.h"
+#include "llvm/Analysis/ModelUnderTrainingRunner.h"
+#include "llvm/Analysis/NoInferenceModelRunner.h"
#include "llvm/Analysis/Utils/TFUtils.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
@@ -94,7 +96,6 @@ struct InlineEvent {
/// Because this is a protobuf, we cannot just stream the events as they come.
/// Internally, TrainingLogger stores data in column-major format, because that
/// lines up with how TF SequenceExample represents it.
-class ModelUnderTrainingRunner;
class TrainingLogger final {
public:
TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
@@ -261,65 +262,21 @@ private:
const int64_t Mandatory;
};
-/// A pseudo model runner. We use it to store feature values when collecting
-/// logs for the default policy, but never ask it to 'run'.
-class NoInferenceModelRunner : public MLModelRunner {
-public:
- NoInferenceModelRunner(LLVMContext &Ctx)
- : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
- void setFeature(FeatureIndex Index, int64_t Value) override {
- Features[static_cast<int>(Index)] = Value;
- }
-
- int64_t getFeature(int Index) const override { return Features[Index]; }
- bool run() override {
- llvm_unreachable("We shouldn't call run on this model runner.");
- }
-
-private:
- InlineFeatures Features;
-};
-
-/// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
-/// to dynamically load and evaluate a TF SavedModel
-/// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
-/// sacrificed for ease of use while training.
-class ModelUnderTrainingRunner final : public MLModelRunner {
-public:
- ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
-
- bool run() override;
+static const std::vector<TensorSpec> TrainingOnlyFeatures{
+ TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
+ TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
+ TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
+ TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
- // Disallows copy and assign.
- ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
- ModelUnderTrainingRunner &
- operator=(const ModelUnderTrainingRunner &) = delete;
-
- void setFeature(FeatureIndex Index, int64_t Value) override;
- int64_t getFeature(int Index) const override;
- bool isValid() const { return !!Evaluator; }
-
- const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const {
- return OutputSpecs;
- }
-
- const Optional<TFModelEvaluator::EvaluationResult> &
- lastEvaluationResult() const {
- return LastEvaluationResult;
- }
-
-private:
- std::unique_ptr<TFModelEvaluator> Evaluator;
- std::vector<LoggedFeatureSpec> OutputSpecs;
- Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult;
+static const std::vector<TensorSpec> getInputFeatures() {
+ std::vector<TensorSpec> InputSpecs;
+ for (size_t I = 0; I < NumberOfFeatures; ++I)
+ InputSpecs.push_back(
+ TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
+ append_range(InputSpecs, TrainingOnlyFeatures);
+ return InputSpecs;
+}
- // The training framework needs some additional features.
- const std::vector<TensorSpec> TrainingOnlyFeatures{
- TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
- TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
- TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
- TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
-};
} // namespace
TrainingLogger::TrainingLogger(StringRef LogFileName,
@@ -353,7 +310,7 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event,
const MLModelRunner &ModelRunner) {
size_t CurrentFeature = 0;
for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
- int64_t F = ModelRunner.getFeature(CurrentFeature);
+ int64_t F = *ModelRunner.getTensor<int64_t>(CurrentFeature);
L->logInt64Value(CurrentFeature, &F);
}
@@ -433,7 +390,9 @@ DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
bool DefaultAdvice = GetDefaultAdvice(CB);
- auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
+ auto Recommendation =
+ IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
+ : DefaultAdvice;
return std::make_unique<LoggingMLInlineAdvice>(
/*Advisor=*/this,
/*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
@@ -458,49 +417,6 @@ size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
return Ret;
}
-ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
- const std::string &ModelPath)
- : MLModelRunner(Ctx) {
- std::vector<TensorSpec> InputSpecs;
- for (size_t I = 0; I < NumberOfFeatures; ++I)
- InputSpecs.push_back(
- TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
- append_range(InputSpecs, TrainingOnlyFeatures);
- if (auto MaybeOutSpecs =
- loadOutputSpecs(Ctx, DecisionName, ModelPath, TFOutputSpecOverride))
- OutputSpecs = std::move(*MaybeOutSpecs);
- else
- return;
-
- Evaluator = std::make_unique<TFModelEvaluator>(
- ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; },
- OutputSpecs.size());
- if (!Evaluator || !Evaluator->isValid()) {
- Ctx.emitError("Failed to create inliner saved model evaluator");
- Evaluator.reset();
- return;
- }
-}
-
-bool ModelUnderTrainingRunner::run() {
- LastEvaluationResult = Evaluator->evaluate();
- if (!LastEvaluationResult.hasValue()) {
- Ctx.emitError("Error evaluating model.");
- return false;
- }
- int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0);
- return static_cast<bool>(Decision);
-}
-
-int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
- return *Evaluator->getInput<int64_t>(Index);
-}
-
-void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
- size_t NumericIndex = static_cast<size_t>(Index);
- *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
-}
-
std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
Module &M, ModuleAnalysisManager &MAM,
std::function<bool(CallBase &)> GetDefaultAdvice) {
@@ -509,10 +425,13 @@ std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
ModelUnderTrainingRunner *MUTRPtr = nullptr;
bool IsDoingInference = false;
if (TFModelUnderTrainingPath.empty())
- Runner.reset(new NoInferenceModelRunner(Ctx));
+ Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures()));
else {
- auto MUTR = std::make_unique<ModelUnderTrainingRunner>(
- Ctx, TFModelUnderTrainingPath);
+ std::unique_ptr<ModelUnderTrainingRunner> MUTR;
+ if (auto MaybeOutputSpecs = loadOutputSpecs(
+ Ctx, DecisionName, TFModelUnderTrainingPath, TFOutputSpecOverride))
+ MUTR = std::make_unique<ModelUnderTrainingRunner>(
+ Ctx, TFModelUnderTrainingPath, getInputFeatures(), *MaybeOutputSpecs);
if (!MUTR || !MUTR->isValid()) {
Ctx.emitError("Could not load the policy model from the provided path");
return nullptr;
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index cfe910df4e91..f5fa6748d053 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -933,7 +933,7 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
/// This function returns the identity element (or neutral element) for
/// the operation K.
Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
- FastMathFlags FMF) {
+ FastMathFlags FMF) const {
switch (K) {
case RecurKind::Xor:
case RecurKind::Add:
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index 73d1eff1b968..140c88eb8b0d 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -40,6 +40,10 @@ static cl::opt<bool>
" callsites processed by inliner but decided"
" to be not inlined"));
+static cl::opt<bool> EnableInlineDeferral("inline-deferral", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable deferred inlining"));
+
// An integer used to limit the cost of inline deferral. The default negative
// number tells shouldBeDeferred to only take the secondary cost into account.
static cl::opt<int>
@@ -136,8 +140,9 @@ llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice(
return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI,
GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
};
- return llvm::shouldInline(CB, GetInlineCost, ORE,
- Params.EnableDeferral.getValueOr(false));
+ return llvm::shouldInline(
+ CB, GetInlineCost, ORE,
+ Params.EnableDeferral.getValueOr(EnableInlineDeferral));
}
std::unique_ptr<InlineAdvice>
@@ -409,8 +414,6 @@ llvm::shouldInline(CallBase &CB,
<< "' in other contexts";
});
setInlineRemark(CB, "deferred");
- // IC does not bool() to false, so get an InlineCost that will.
- // This will not be inspected to make an error message.
return None;
}
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 22d2ce11cc90..4831b22b1d46 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2173,6 +2173,15 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
}
}
+ // ((X | Y) ^ X ) & ((X | Y) ^ Y) --> 0
+ // ((X | Y) ^ Y ) & ((X | Y) ^ X) --> 0
+ BinaryOperator *Or;
+ if (match(Op0, m_c_Xor(m_Value(X),
+ m_CombineAnd(m_BinOp(Or),
+ m_c_Or(m_Deferred(X), m_Value(Y))))) &&
+ match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y))))
+ return Constant::getNullValue(Op0->getType());
+
return nullptr;
}
@@ -2198,6 +2207,18 @@ static Value *simplifyOrLogic(Value *X, Value *Y) {
Value *A, *B;
+ // (A ^ B) | (A | B) --> A | B
+ // (A ^ B) | (B | A) --> B | A
+ if (match(X, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
+ return Y;
+
+ // ~(A ^ B) | (A | B) --> -1
+ // ~(A ^ B) | (B | A) --> -1
+ if (match(X, m_Not(m_Xor(m_Value(A), m_Value(B)))) &&
+ match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
+ return ConstantInt::getAllOnesValue(Ty);
+
// (A & ~B) | (A ^ B) --> A ^ B
// (~B & A) | (A ^ B) --> A ^ B
// (A & ~B) | (B ^ A) --> B ^ A
@@ -2214,18 +2235,33 @@ static Value *simplifyOrLogic(Value *X, Value *Y) {
match(Y, m_c_And(m_Specific(A), m_Specific(B))))
return X;
- // (A ^ B) | (A | B) --> A | B
- // (A ^ B) | (B | A) --> B | A
- if (match(X, m_Xor(m_Value(A), m_Value(B))) &&
- match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
- return Y;
-
- // ~(A ^ B) | (A | B) --> -1
- // ~(A ^ B) | (B | A) --> -1
- if (match(X, m_Not(m_Xor(m_Value(A), m_Value(B)))) &&
- match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
+ // (~A | B) | (A ^ B) --> -1
+ // (~A | B) | (B ^ A) --> -1
+ // (B | ~A) | (A ^ B) --> -1
+ // (B | ~A) | (B ^ A) --> -1
+ if (match(X, m_c_Or(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Y, m_c_Xor(m_Specific(A), m_Specific(B))))
return ConstantInt::getAllOnesValue(Ty);
+ // (~A & B) | ~(A | B) --> ~A
+ // (~A & B) | ~(B | A) --> ~A
+ // (B & ~A) | ~(A | B) --> ~A
+ // (B & ~A) | ~(B | A) --> ~A
+ Value *NotA;
+ if (match(X,
+ m_c_And(m_CombineAnd(m_Value(NotA), m_NotForbidUndef(m_Value(A))),
+ m_Value(B))) &&
+ match(Y, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ return NotA;
+
+ // ~(A ^ B) | (A & B) --> ~(A & B)
+ // ~(A ^ B) | (B & A) --> ~(A & B)
+ Value *NotAB;
+ if (match(X, m_CombineAnd(m_NotForbidUndef(m_Xor(m_Value(A), m_Value(B))),
+ m_Value(NotAB))) &&
+ match(Y, m_c_And(m_Specific(A), m_Specific(B))))
+ return NotAB;
+
return nullptr;
}
@@ -2259,27 +2295,6 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or))
return V;
- Value *A, *B, *NotA;
-
- // (~A & B) | ~(A | B) --> ~A
- // (~A & B) | ~(B | A) --> ~A
- // (B & ~A) | ~(A | B) --> ~A
- // (B & ~A) | ~(B | A) --> ~A
- if (match(Op0, m_c_And(m_CombineAnd(m_Value(NotA), m_Not(m_Value(A))),
- m_Value(B))) &&
- match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
- return NotA;
-
- // Commute the 'or' operands.
- // ~(A | B) | (~A & B) --> ~A
- // ~(B | A) | (~A & B) --> ~A
- // ~(A | B) | (B & ~A) --> ~A
- // ~(B | A) | (B & ~A) --> ~A
- if (match(Op1, m_c_And(m_CombineAnd(m_Value(NotA), m_Not(m_Value(A))),
- m_Value(B))) &&
- match(Op0, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
- return NotA;
-
// Rotated -1 is still -1:
// (-1 << X) | (-1 >> (C - X)) --> -1
// (-1 >> X) | (-1 << (C - X)) --> -1
@@ -2335,6 +2350,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
}
// (A & C1)|(B & C2)
+ Value *A, *B;
const APInt *C1, *C2;
if (match(Op0, m_And(m_Value(A), m_APInt(C1))) &&
match(Op1, m_And(m_Value(B), m_APInt(C2)))) {
@@ -2696,9 +2712,17 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
if (!OpTy->isIntOrIntVectorTy(1))
return nullptr;
- // A boolean compared to true/false can be simplified in 14 out of the 20
- // (10 predicates * 2 constants) possible combinations. Cases not handled here
- // require a 'not' of the LHS, so those must be transformed in InstCombine.
+ // A boolean compared to true/false can be reduced in 14 out of the 20
+ // (10 predicates * 2 constants) possible combinations. The other
+ // 6 cases require a 'not' of the LHS.
+
+ auto ExtractNotLHS = [](Value *V) -> Value * {
+ Value *X;
+ if (match(V, m_Not(m_Value(X))))
+ return X;
+ return nullptr;
+ };
+
if (match(RHS, m_Zero())) {
switch (Pred) {
case CmpInst::ICMP_NE: // X != 0 -> X
@@ -2706,6 +2730,13 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
case CmpInst::ICMP_SLT: // X <s 0 -> X
return LHS;
+ case CmpInst::ICMP_EQ: // not(X) == 0 -> X != 0 -> X
+ case CmpInst::ICMP_ULE: // not(X) <=u 0 -> X >u 0 -> X
+ case CmpInst::ICMP_SGE: // not(X) >=s 0 -> X <s 0 -> X
+ if (Value *X = ExtractNotLHS(LHS))
+ return X;
+ break;
+
case CmpInst::ICMP_ULT: // X <u 0 -> false
case CmpInst::ICMP_SGT: // X >s 0 -> false
return getFalse(ITy);
@@ -2723,6 +2754,13 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
case CmpInst::ICMP_SLE: // X <=s -1 -> X
return LHS;
+ case CmpInst::ICMP_NE: // not(X) != 1 -> X == 1 -> X
+ case CmpInst::ICMP_ULT: // not(X) <=u 1 -> X >=u 1 -> X
+ case CmpInst::ICMP_SGT: // not(X) >s 1 -> X <=s -1 -> X
+ if (Value *X = ExtractNotLHS(LHS))
+ return X;
+ break;
+
case CmpInst::ICMP_UGT: // X >u 1 -> false
case CmpInst::ICMP_SLT: // X <s -1 -> false
return getFalse(ITy);
@@ -5887,9 +5925,9 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
auto Attr = Call->getFunction()->getFnAttribute(Attribute::VScaleRange);
if (!Attr.isValid())
return nullptr;
- unsigned VScaleMin, VScaleMax;
- std::tie(VScaleMin, VScaleMax) = Attr.getVScaleRangeArgs();
- if (VScaleMin == VScaleMax && VScaleMax != 0)
+ unsigned VScaleMin = Attr.getVScaleRangeMin();
+ Optional<unsigned> VScaleMax = Attr.getVScaleRangeMax();
+ if (VScaleMax && VScaleMin == VScaleMax)
return ConstantInt::get(F->getReturnType(), VScaleMin);
return nullptr;
}
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 19a24ac6a484..6444518dc70c 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1568,11 +1568,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
+ bool HasSameSize =
+ DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
uint64_t Stride = std::abs(StrideAPtr);
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
- if (!isa<SCEVCouldNotCompute>(Dist) &&
- TypeByteSize == DL.getTypeAllocSize(BTy) &&
+ if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
isSafeDependenceDistance(DL, *(PSE.getSE()),
*(PSE.getBackedgeTakenCount()), *Dist, Stride,
TypeByteSize))
@@ -1587,7 +1588,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
int64_t Distance = Val.getSExtValue();
// Attempt to prove strided accesses independent.
- if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
+ if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
return Dependence::NoDep;
@@ -1598,7 +1599,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
(couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
- ATy != BTy)) {
+ !HasSameSize)) {
LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
return Dependence::ForwardButPreventsForwarding;
}
@@ -1608,21 +1609,19 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
}
// Write to the same location with the same size.
- // Could be improved to assert type sizes are the same (i32 == float, etc).
if (Val == 0) {
- if (ATy == BTy)
+ if (HasSameSize)
return Dependence::Forward;
LLVM_DEBUG(
- dbgs() << "LAA: Zero dependence difference but different types\n");
+ dbgs() << "LAA: Zero dependence difference but different type sizes\n");
return Dependence::Unknown;
}
assert(Val.isStrictlyPositive() && "Expect a positive value");
- if (ATy != BTy) {
- LLVM_DEBUG(
- dbgs()
- << "LAA: ReadWrite-Write positive dependency with different types\n");
+ if (!HasSameSize) {
+ LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
+ "different type sizes\n");
return Dependence::Unknown;
}
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 6fc4c42bdd71..f5a65cd2b689 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -35,6 +35,21 @@
using namespace llvm;
+#ifdef LLVM_HAVE_TF_AOT
+#include "llvm/Analysis/ReleaseModeModelRunner.h"
+// codegen-ed file
+#include "InlinerSizeModel.h" // NOLINT
+#include "llvm/Analysis/InlineModelFeatureMaps.h"
+
+std::unique_ptr<InlineAdvisor>
+llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
+ auto AOTRunner =
+ std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
+ M.getContext(), FeatureNameMap, DecisionName);
+ return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
+}
+#endif
+
#define DEBUG_TYPE "inline-ml"
static cl::opt<float> SizeIncreaseThreshold(
@@ -245,29 +260,32 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller);
auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee);
- ModelRunner->setFeature(FeatureIndex::CalleeBasicBlockCount,
- CalleeBefore.BasicBlockCount);
- ModelRunner->setFeature(FeatureIndex::CallSiteHeight,
- FunctionLevels[&Caller]);
- ModelRunner->setFeature(FeatureIndex::NodeCount, NodeCount);
- ModelRunner->setFeature(FeatureIndex::NrCtantParams, NrCtantParams);
- ModelRunner->setFeature(FeatureIndex::EdgeCount, EdgeCount);
- ModelRunner->setFeature(FeatureIndex::CallerUsers, CallerBefore.Uses);
- ModelRunner->setFeature(FeatureIndex::CallerConditionallyExecutedBlocks,
- CallerBefore.BlocksReachedFromConditionalInstruction);
- ModelRunner->setFeature(FeatureIndex::CallerBasicBlockCount,
- CallerBefore.BasicBlockCount);
- ModelRunner->setFeature(FeatureIndex::CalleeConditionallyExecutedBlocks,
- CalleeBefore.BlocksReachedFromConditionalInstruction);
- ModelRunner->setFeature(FeatureIndex::CalleeUsers, CalleeBefore.Uses);
- ModelRunner->setFeature(FeatureIndex::CostEstimate, CostEstimate);
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
+ CalleeBefore.BasicBlockCount;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) =
+ FunctionLevels[&Caller];
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) =
+ CallerBefore.Uses;
+ *ModelRunner->getTensor<int64_t>(
+ FeatureIndex::CallerConditionallyExecutedBlocks) =
+ CallerBefore.BlocksReachedFromConditionalInstruction;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) =
+ CallerBefore.BasicBlockCount;
+ *ModelRunner->getTensor<int64_t>(
+ FeatureIndex::CalleeConditionallyExecutedBlocks) =
+ CalleeBefore.BlocksReachedFromConditionalInstruction;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) =
+ CalleeBefore.Uses;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate;
// Add the cost features
for (size_t I = 0;
I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
- ModelRunner->setFeature(
- inlineCostFeatureToMlFeature(static_cast<InlineCostFeatureIndex>(I)),
- CostFeatures->at(I));
+ *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature(
+ static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I);
}
return getAdviceFromModel(CB, ORE);
@@ -276,7 +294,8 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
std::unique_ptr<MLInlineAdvice>
MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
OptimizationRemarkEmitter &ORE) {
- return std::make_unique<MLInlineAdvice>(this, CB, ORE, ModelRunner->run());
+ return std::make_unique<MLInlineAdvice>(
+ this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
}
std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
@@ -302,7 +321,8 @@ void MLInlineAdvice::reportContextForRemark(
using namespace ore;
OR << NV("Callee", Callee->getName());
for (size_t I = 0; I < NumberOfFeatures; ++I)
- OR << NV(FeatureNameMap[I], getAdvisor()->getModelRunner().getFeature(I));
+ OR << NV(FeatureNameMap[I],
+ *getAdvisor()->getModelRunner().getTensor<int64_t>(I));
OR << NV("ShouldInline", isInliningRecommended());
}
diff --git a/llvm/lib/Analysis/MemDerefPrinter.cpp b/llvm/lib/Analysis/MemDerefPrinter.cpp
index 1b16e1a9bcb2..30937a2e4931 100644
--- a/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -59,8 +59,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
Value *PO = LI->getPointerOperand();
if (isDereferenceablePointer(PO, LI->getType(), DL))
Deref.push_back(PO);
- if (isDereferenceableAndAlignedPointer(
- PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL))
+ if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
+ MaybeAlign(LI->getAlign()), DL))
DerefAndAligned.insert(PO);
}
}
@@ -94,8 +94,8 @@ PreservedAnalyses MemDerefPrinterPass::run(Function &F,
Value *PO = LI->getPointerOperand();
if (isDereferenceablePointer(PO, LI->getType(), DL))
Deref.push_back(PO);
- if (isDereferenceableAndAlignedPointer(
- PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL))
+ if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
+ MaybeAlign(LI->getAlign()), DL))
DerefAndAligned.insert(PO);
}
}
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 4f2b5b34304d..ffdd7a2cfd4b 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -592,9 +592,9 @@ STATISTIC(ObjectVisitorArgument,
STATISTIC(ObjectVisitorLoad,
"Number of load instructions with unsolved size and offset");
-APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Alignment) {
+APInt ObjectSizeOffsetVisitor::align(APInt Size, MaybeAlign Alignment) {
if (Options.RoundToAlign && Alignment)
- return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align(Alignment)));
+ return APInt(IntTyBits, alignTo(Size.getZExtValue(), Alignment));
return Size;
}
@@ -669,7 +669,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType()));
if (!I.isArrayAllocation())
- return std::make_pair(align(Size, I.getAlignment()), Zero);
+ return std::make_pair(align(Size, I.getAlign()), Zero);
Value *ArraySize = I.getArraySize();
if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) {
@@ -679,8 +679,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
bool Overflow;
Size = Size.umul_ov(NumElems, Overflow);
- return Overflow ? unknown() : std::make_pair(align(Size, I.getAlignment()),
- Zero);
+ return Overflow ? unknown()
+ : std::make_pair(align(Size, I.getAlign()), Zero);
}
return unknown();
}
@@ -694,7 +694,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
}
APInt Size(IntTyBits, DL.getTypeAllocSize(MemoryTy));
- return std::make_pair(align(Size, A.getParamAlignment()), Zero);
+ return std::make_pair(align(Size, A.getParamAlign()), Zero);
}
SizeOffsetType ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) {
@@ -800,7 +800,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
return unknown();
APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getValueType()));
- return std::make_pair(align(Size, GV.getAlignment()), Zero);
+ return std::make_pair(align(Size, GV.getAlign()), Zero);
}
SizeOffsetType ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst&) {
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index 854ba83bd34a..a877b19df866 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -101,13 +101,8 @@ MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) {
}
MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
- auto Size = LocationSize::afterPointer();
- if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
- Size = LocationSize::precise(C->getValue().getZExtValue());
-
- // memcpy/memmove can have AA tags. For memcpy, they apply
- // to both the source and the destination.
- return MemoryLocation(MTI->getRawSource(), Size, MTI->getAAMetadata());
+ assert(MTI->getRawSource() == MTI->getArgOperand(1));
+ return getForArgument(MTI, 1, nullptr);
}
MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MI) {
@@ -119,13 +114,47 @@ MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) {
}
MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
- auto Size = LocationSize::afterPointer();
- if (ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength()))
- Size = LocationSize::precise(C->getValue().getZExtValue());
+ assert(MI->getRawDest() == MI->getArgOperand(0));
+ return getForArgument(MI, 0, nullptr);
+}
+
+Optional<MemoryLocation>
+MemoryLocation::getForDest(const CallBase *CB, const TargetLibraryInfo &TLI) {
+ if (!CB->onlyAccessesArgMemory())
+ return None;
+
+ if (CB->hasOperandBundles())
+ // TODO: remove implementation restriction
+ return None;
+
+ Value *UsedV = nullptr;
+ Optional<unsigned> UsedIdx;
+ for (unsigned i = 0; i < CB->arg_size(); i++) {
+ if (!CB->getArgOperand(i)->getType()->isPointerTy())
+ continue;
+ if (CB->onlyReadsMemory(i))
+ continue;
+ if (!UsedV) {
+ // First potentially writing parameter
+ UsedV = CB->getArgOperand(i);
+ UsedIdx = i;
+ continue;
+ }
+ UsedIdx = None;
+ if (UsedV != CB->getArgOperand(i))
+ // Can't describe writing to two distinct locations.
+ // TODO: This results in an inprecision when two values derived from the
+ // same object are passed as arguments to the same function.
+ return None;
+ }
+ if (!UsedV)
+ // We don't currently have a way to represent a "does not write" result
+ // and thus have to be conservative and return unknown.
+ return None;
- // memcpy/memmove can have AA tags. For memcpy, they apply
- // to both the source and the destination.
- return MemoryLocation(MI->getRawDest(), Size, MI->getAAMetadata());
+ if (UsedIdx)
+ return getForArgument(CB, *UsedIdx, &TLI);
+ return MemoryLocation::getBeforeOrAfter(UsedV, CB->getAAMetadata());
}
MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
@@ -145,6 +174,9 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
case Intrinsic::memcpy:
case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ case Intrinsic::memmove_element_unordered_atomic:
+ case Intrinsic::memset_element_unordered_atomic:
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memory intrinsic");
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
@@ -204,6 +236,10 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
II->getArgOperand(1)->getType())),
AATags);
}
+
+ assert(
+ !isa<AnyMemTransferInst>(II) &&
+ "all memory transfer intrinsics should be handled by the switch above");
}
// We can bound the aliasing properties of memset_pattern16 just as we can
@@ -213,6 +249,12 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
LibFunc F;
if (TLI && TLI->getLibFunc(*Call, F) && TLI->has(F)) {
switch (F) {
+ case LibFunc_strcpy:
+ case LibFunc_strcat:
+ case LibFunc_strncat:
+ assert((ArgIdx == 0 || ArgIdx == 1) && "Invalid argument index for str function");
+ return MemoryLocation::getAfter(Arg, AATags);
+
case LibFunc_memset_chk: {
assert(ArgIdx == 0 && "Invalid argument index for memset_chk");
LocationSize Size = LocationSize::afterPointer();
@@ -236,10 +278,18 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
return MemoryLocation(Arg, Size, AATags);
}
case LibFunc_memset_pattern16:
+ case LibFunc_memset_pattern4:
+ case LibFunc_memset_pattern8:
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memset_pattern16");
- if (ArgIdx == 1)
- return MemoryLocation(Arg, LocationSize::precise(16), AATags);
+ if (ArgIdx == 1) {
+ unsigned Size = 16;
+ if (F == LibFunc_memset_pattern4)
+ Size = 4;
+ else if (F == LibFunc_memset_pattern8)
+ Size = 8;
+ return MemoryLocation(Arg, LocationSize::precise(Size), AATags);
+ }
if (const ConstantInt *LenCI =
dyn_cast<ConstantInt>(Call->getArgOperand(2)))
return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
@@ -274,7 +324,6 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
break;
};
}
- // FIXME: Handle memset_pattern4 and memset_pattern8 also.
return MemoryLocation::getBeforeOrAfter(Call->getArgOperand(ArgIdx), AATags);
}
diff --git a/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp b/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp
new file mode 100644
index 000000000000..941458f648bc
--- /dev/null
+++ b/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp
@@ -0,0 +1,49 @@
+//===- ModelUnderTrainingRunner.cpp - 'development' mode runner -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of a MLModelRunner for 'development' mode, i.e. evaluation
+// happens off a model that's provided from the command line and is interpreted.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#if defined(LLVM_HAVE_TF_API)
+
+#include "llvm/Analysis/ModelUnderTrainingRunner.h"
+
+using namespace llvm;
+
+ModelUnderTrainingRunner::ModelUnderTrainingRunner(
+ LLVMContext &Ctx, const std::string &ModelPath,
+ const std::vector<TensorSpec> &InputSpecs,
+ const std::vector<LoggedFeatureSpec> &OutputSpecs)
+ : MLModelRunner(Ctx), OutputSpecs(OutputSpecs) {
+ Evaluator = std::make_unique<TFModelEvaluator>(
+ ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; },
+ OutputSpecs.size());
+ if (!Evaluator || !Evaluator->isValid()) {
+ Ctx.emitError("Failed to create inliner saved model evaluator");
+ Evaluator.reset();
+ return;
+ }
+}
+
+void *ModelUnderTrainingRunner::evaluateUntyped() {
+ LastEvaluationResult = Evaluator->evaluate();
+ if (!LastEvaluationResult.hasValue()) {
+ Ctx.emitError("Error evaluating model.");
+ return nullptr;
+ }
+ return LastEvaluationResult->getUntypedTensorValue(0);
+}
+
+void *ModelUnderTrainingRunner::getTensorUntyped(size_t Index) {
+ return Evaluator->getUntypedInput(Index);
+}
+
+#endif // defined(LLVM_HAVE_TF_API)
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index d80814852e19..2880ca62a7f8 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -234,6 +234,18 @@ static bool isNonVolatileStore(const Instruction *I) {
return false;
}
+// Returns true if the function definition must be unreachable.
+//
+// Note if this helper function returns true, `F` is guaranteed
+// to be unreachable; if it returns false, `F` might still
+// be unreachable but not covered by this helper function.
+static bool mustBeUnreachableFunction(const Function &F) {
+ // A function must be unreachable if its entry block ends with an
+ // 'unreachable'.
+ assert(!F.isDeclaration());
+ return isa<UnreachableInst>(F.getEntryBlock().getTerminator());
+}
+
static void computeFunctionSummary(
ModuleSummaryIndex &Index, const Module &M, const Function &F,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT,
@@ -488,7 +500,8 @@ static void computeFunctionSummary(
// Don't try to import functions with noinline attribute.
F.getAttributes().hasFnAttr(Attribute::NoInline),
F.hasFnAttribute(Attribute::AlwaysInline),
- F.hasFnAttribute(Attribute::NoUnwind), MayThrow, HasUnknownCall};
+ F.hasFnAttribute(Attribute::NoUnwind), MayThrow, HasUnknownCall,
+ mustBeUnreachableFunction(F)};
std::vector<FunctionSummary::ParamAccess> ParamAccesses;
if (auto *SSI = GetSSICallback(F))
ParamAccesses = SSI->getParamAccesses(Index);
@@ -737,7 +750,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
F->hasFnAttribute(Attribute::AlwaysInline),
F->hasFnAttribute(Attribute::NoUnwind),
/* MayThrow */ true,
- /* HasUnknownCall */ true},
+ /* HasUnknownCall */ true,
+ /* MustBeUnreachable */ false},
/*EntryCount=*/0, ArrayRef<ValueInfo>{},
ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{},
diff --git a/llvm/lib/Analysis/NoInferenceModelRunner.cpp b/llvm/lib/Analysis/NoInferenceModelRunner.cpp
new file mode 100644
index 000000000000..02ece6aa3900
--- /dev/null
+++ b/llvm/lib/Analysis/NoInferenceModelRunner.cpp
@@ -0,0 +1,33 @@
+//===- NoInferenceModelRunner.cpp - noop ML model runner ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A pseudo model runner. We use it to store feature values when collecting
+// logs for the default policy, in 'development' mode, but never ask it to
+// 'run'.
+//===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h"
+#if defined(LLVM_HAVE_TF_API)
+
+#include "llvm/Analysis/NoInferenceModelRunner.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+
+using namespace llvm;
+
+NoInferenceModelRunner::NoInferenceModelRunner(
+ LLVMContext &Ctx, const std::vector<TensorSpec> &Inputs)
+ : MLModelRunner(Ctx) {
+ ValuesBuffer.reserve(Inputs.size());
+ for (const auto &TS : Inputs)
+ ValuesBuffer.push_back(std::make_unique<char[]>(TS.getElementCount() *
+ TS.getElementByteSize()));
+}
+
+void *NoInferenceModelRunner::getTensorUntyped(size_t Index) {
+ return ValuesBuffer[Index].get();
+}
+#endif // defined(LLVM_HAVE_TF_API)
diff --git a/llvm/lib/Analysis/ReleaseModeModelRunner.cpp b/llvm/lib/Analysis/ReleaseModeModelRunner.cpp
deleted file mode 100644
index d2bf95388066..000000000000
--- a/llvm/lib/Analysis/ReleaseModeModelRunner.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===- ReleaseModeModelRunner.cpp - Fast, precompiled model runner -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a model runner wrapping an AOT compiled ML model.
-// Only inference is supported.
-//
-//===----------------------------------------------------------------------===//
-#include "llvm/Config/config.h"
-#if defined(LLVM_HAVE_TF_AOT)
-
-#include "llvm/Analysis/InlineModelFeatureMaps.h"
-#include "llvm/Analysis/MLInlineAdvisor.h"
-
-// codegen-ed file
-#include "InlinerSizeModel.h" // NOLINT
-
-#include <memory>
-#include <vector>
-
-using namespace llvm;
-namespace {
-
-const char FeedPrefix[] = "feed_";
-const char FetchPrefix[] = "fetch_";
-
-/// MLModelRunner - production mode implementation. It uses a AOT-compiled
-/// SavedModel for efficient execution.
-class ReleaseModeModelRunner final : public MLModelRunner {
-public:
- ReleaseModeModelRunner(LLVMContext &Ctx);
- virtual ~ReleaseModeModelRunner() = default;
-
- bool run() override;
-
- void setFeature(FeatureIndex Index, int64_t Value) override;
- int64_t getFeature(int Index) const override;
-
-private:
- std::vector<int32_t> FeatureIndices;
- int32_t ResultIndex = -1;
- std::unique_ptr<llvm::InlinerSizeModel> CompiledModel;
-};
-} // namespace
-
-ReleaseModeModelRunner::ReleaseModeModelRunner(LLVMContext &Ctx)
- : MLModelRunner(Ctx),
- CompiledModel(std::make_unique<llvm::InlinerSizeModel>()) {
- assert(CompiledModel && "The CompiledModel should be valid");
-
- FeatureIndices.resize(NumberOfFeatures);
-
- for (size_t I = 0; I < NumberOfFeatures; ++I) {
- const int Index =
- CompiledModel->LookupArgIndex(FeedPrefix + FeatureNameMap[I]);
- assert(Index >= 0 && "Cannot find Feature in inlining model");
- FeatureIndices[I] = Index;
- }
-
- ResultIndex =
- CompiledModel->LookupResultIndex(std::string(FetchPrefix) + DecisionName);
- assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model");
-}
-
-int64_t ReleaseModeModelRunner::getFeature(int Index) const {
- return *static_cast<int64_t *>(
- CompiledModel->arg_data(FeatureIndices[Index]));
-}
-
-void ReleaseModeModelRunner::setFeature(FeatureIndex Index, int64_t Value) {
- *static_cast<int64_t *>(CompiledModel->arg_data(
- FeatureIndices[static_cast<size_t>(Index)])) = Value;
-}
-
-bool ReleaseModeModelRunner::run() {
- CompiledModel->Run();
- return static_cast<bool>(
- *static_cast<int64_t *>(CompiledModel->result_data(ResultIndex)));
-}
-
-std::unique_ptr<InlineAdvisor>
-llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
- auto AOTRunner = std::make_unique<ReleaseModeModelRunner>(M.getContext());
- return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
-}
-#endif // defined(LLVM_HAVE_TF_AOT)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 7dc7f9904c70..0c3f32295ae1 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8829,11 +8829,10 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
for (auto &LS : reverse(ValuesAtScopes[V]))
if (LS.first == L) {
LS.second = C;
+ if (!isa<SCEVConstant>(C))
+ ValuesAtScopesUsers[C].push_back({L, V});
break;
}
-
- if (!isa<SCEVConstant>(C))
- ValuesAtScopesUsers[C].push_back({L, V});
return C;
}
@@ -13058,11 +13057,13 @@ void ScalarEvolution::verify() const {
Worklist.append(L->begin(), L->end());
}
for (auto &KV : ValueExprMap) {
+#ifndef NDEBUG
// Check for SCEV expressions referencing invalid/deleted loops.
if (auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) {
assert(ValidLoops.contains(AR->getLoop()) &&
"AddRec references invalid loop");
}
+#endif
// Check that the value is also part of the reverse map.
auto It = ExprValueMap.find(KV.second);
@@ -13122,7 +13123,7 @@ void ScalarEvolution::verify() const {
is_contained(It->second, std::make_pair(L, Value)))
continue;
dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: "
- << ValueAtScope << " missing in ValuesAtScopesUsers\n";
+ << *ValueAtScope << " missing in ValuesAtScopesUsers\n";
std::abort();
}
}
@@ -13139,7 +13140,7 @@ void ScalarEvolution::verify() const {
is_contained(It->second, std::make_pair(L, ValueAtScope)))
continue;
dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: "
- << ValueAtScope << " missing in ValuesAtScopes\n";
+ << *ValueAtScope << " missing in ValuesAtScopes\n";
std::abort();
}
}
@@ -13958,11 +13959,12 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
ExprsToRewrite.push_back(LHS);
}
};
- // Starting at the loop predecessor, climb up the predecessor chain, as long
- // as there are predecessors that can be found that have unique successors
- // leading to the original header.
+ // First, collect conditions from dominating branches. Starting at the loop
+ // predecessor, climb up the predecessor chain, as long as there are
+ // predecessors that can be found that have unique successors leading to the
+ // original header.
// TODO: share this logic with isLoopEntryGuardedByCond.
- DenseMap<const SCEV *, const SCEV *> RewriteMap;
+ SmallVector<std::pair<Value *, bool>> Terms;
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
L->getLoopPredecessor(), L->getHeader());
Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
@@ -13972,10 +13974,20 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional())
continue;
- bool EnterIfTrue = LoopEntryPredicate->getSuccessor(0) == Pair.second;
+ Terms.emplace_back(LoopEntryPredicate->getCondition(),
+ LoopEntryPredicate->getSuccessor(0) == Pair.second);
+ }
+
+ // Now apply the information from the collected conditions to RewriteMap.
+ // Conditions are processed in reverse order, so the earliest conditions is
+ // processed first. This ensures the SCEVs with the shortest dependency chains
+ // are constructed first.
+ DenseMap<const SCEV *, const SCEV *> RewriteMap;
+ for (auto &E : reverse(Terms)) {
+ bool EnterIfTrue = E.second;
SmallVector<Value *, 8> Worklist;
SmallPtrSet<Value *, 8> Visited;
- Worklist.push_back(LoopEntryPredicate->getCondition());
+ Worklist.push_back(E.first);
while (!Worklist.empty()) {
Value *Cond = Worklist.pop_back_val();
if (!Visited.insert(Cond).second)
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 72fbd5ad3f68..02923c2c7eb1 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -238,9 +238,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// e.g., x86_64-pc-windows-msvc18.
bool hasPartialC99 = true;
if (T.isKnownWindowsMSVCEnvironment()) {
- unsigned Major, Minor, Micro;
- T.getEnvironmentVersion(Major, Minor, Micro);
- hasPartialC99 = (Major == 0 || Major >= 19);
+ VersionTuple Version = T.getEnvironmentVersion();
+ hasPartialC99 = (Version.getMajor() == 0 || Version.getMajor() >= 19);
}
// Latest targets support C89 math functions, in part.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 5067f493f02d..6aa9a77391dc 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -982,10 +982,10 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
return TTIImpl->areInlineCompatible(Caller, Callee);
}
-bool TargetTransformInfo::areFunctionArgsABICompatible(
+bool TargetTransformInfo::areTypesABICompatible(
const Function *Caller, const Function *Callee,
- SmallPtrSetImpl<Argument *> &Args) const {
- return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
+ const ArrayRef<Type *> &Types) const {
+ return TTIImpl->areTypesABICompatible(Caller, Callee, Types);
}
bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
@@ -1072,8 +1072,13 @@ bool TargetTransformInfo::supportsScalableVectors() const {
return TTIImpl->supportsScalableVectors();
}
-bool TargetTransformInfo::hasActiveVectorLength() const {
- return TTIImpl->hasActiveVectorLength();
+bool TargetTransformInfo::enableScalableVectorization() const {
+ return TTIImpl->enableScalableVectorization();
+}
+
+bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType,
+ Align Alignment) const {
+ return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment);
}
InstructionCost
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 1c41c77a8cfb..fc378f97de0b 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1154,7 +1154,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// If the negate has an NSW flag we can assume the sign bit of the result
// will be 0 because that makes abs(INT_MIN) undefined.
if (match(RHS, m_Neg(m_Specific(LHS))) &&
- Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
+ Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RHS)))
Known.Zero.setSignBit();
}
@@ -1709,23 +1709,25 @@ static void computeKnownBitsFromOperator(const Operator *I,
!II->getFunction()->hasFnAttribute(Attribute::VScaleRange))
break;
- auto VScaleRange = II->getFunction()
- ->getFnAttribute(Attribute::VScaleRange)
- .getVScaleRangeArgs();
+ auto Attr = II->getFunction()->getFnAttribute(Attribute::VScaleRange);
+ Optional<unsigned> VScaleMax = Attr.getVScaleRangeMax();
- if (VScaleRange.second == 0)
+ if (!VScaleMax)
break;
+ unsigned VScaleMin = Attr.getVScaleRangeMin();
+
// If vscale min = max then we know the exact value at compile time
// and hence we know the exact bits.
- if (VScaleRange.first == VScaleRange.second) {
- Known.One = VScaleRange.first;
- Known.Zero = VScaleRange.first;
+ if (VScaleMin == VScaleMax) {
+ Known.One = VScaleMin;
+ Known.Zero = VScaleMin;
Known.Zero.flipAllBits();
break;
}
- unsigned FirstZeroHighBit = 32 - countLeadingZeros(VScaleRange.second);
+ unsigned FirstZeroHighBit =
+ 32 - countLeadingZeros(VScaleMax.getValue());
if (FirstZeroHighBit < BitWidth)
Known.Zero.setBitsFrom(FirstZeroHighBit);
@@ -4676,8 +4678,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
return isDereferenceableAndAlignedPointer(
- LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlignment()),
- DL, CtxI, DT, TLI);
+ LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlign()), DL,
+ CtxI, DT, TLI);
}
case Instruction::Call: {
auto *CI = cast<const CallInst>(Inst);
@@ -4975,14 +4977,6 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
if (ConsiderFlags && Op->hasPoisonGeneratingFlags())
return true;
- // TODO: this should really be under the ConsiderFlags block, but currently
- // these are not dropped by dropPoisonGeneratingFlags
- if (const auto *FP = dyn_cast<FPMathOperator>(Op)) {
- auto FMF = FP->getFastMathFlags();
- if (FMF.noNaNs() || FMF.noInfs())
- return true;
- }
-
unsigned Opcode = Op->getOpcode();
// Check whether opcode is a poison/undef-generating operation
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 41fb0b9008be..e3bf41c9721b 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -733,6 +733,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(x);
KEYWORD(blockaddress);
KEYWORD(dso_local_equivalent);
+ KEYWORD(no_cfi);
// Metadata types.
KEYWORD(distinct);
@@ -773,6 +774,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(noUnwind);
KEYWORD(mayThrow);
KEYWORD(hasUnknownCall);
+ KEYWORD(mustBeUnreachable);
KEYWORD(calls);
KEYWORD(callee);
KEYWORD(params);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 5feabd876e3a..35c615522fe2 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -152,28 +152,28 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
FnAttrs.removeAttribute(Attribute::Alignment);
}
- AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, FnAttrs);
Fn->setAttributes(AS);
} else if (CallInst *CI = dyn_cast<CallInst>(V)) {
AttributeList AS = CI->getAttributes();
AttrBuilder FnAttrs(AS.getFnAttrs());
AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
- AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, FnAttrs);
CI->setAttributes(AS);
} else if (InvokeInst *II = dyn_cast<InvokeInst>(V)) {
AttributeList AS = II->getAttributes();
AttrBuilder FnAttrs(AS.getFnAttrs());
AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
- AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, FnAttrs);
II->setAttributes(AS);
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(V)) {
AttributeList AS = CBI->getAttributes();
AttrBuilder FnAttrs(AS.getFnAttrs());
AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
- AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, FnAttrs);
CBI->setAttributes(AS);
} else if (auto *GV = dyn_cast<GlobalVariable>(V)) {
AttrBuilder Attrs(GV->getAttributes());
@@ -1306,7 +1306,8 @@ bool LLParser::parseEnumAttribute(Attribute::AttrKind Attr, AttrBuilder &B,
unsigned MinValue, MaxValue;
if (parseVScaleRangeArguments(MinValue, MaxValue))
return true;
- B.addVScaleRangeAttr(MinValue, MaxValue);
+ B.addVScaleRangeAttr(MinValue,
+ MaxValue > 0 ? MaxValue : Optional<unsigned>());
return false;
}
case Attribute::Dereferenceable: {
@@ -3287,6 +3288,20 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
return false;
}
+ case lltok::kw_no_cfi: {
+ // ValID ::= 'no_cfi' @foo
+ Lex.Lex();
+
+ if (parseValID(ID, PFS))
+ return true;
+
+ if (ID.Kind != ValID::t_GlobalID && ID.Kind != ValID::t_GlobalName)
+ return error(ID.Loc, "expected global value name in no_cfi");
+
+ ID.NoCFI = true;
+ return false;
+ }
+
case lltok::kw_trunc:
case lltok::kw_zext:
case lltok::kw_sext:
@@ -5267,9 +5282,13 @@ bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
}
case ValID::t_GlobalName:
V = getGlobalVal(ID.StrVal, Ty, ID.Loc);
+ if (V && ID.NoCFI)
+ V = NoCFIValue::get(cast<GlobalValue>(V));
return V == nullptr;
case ValID::t_GlobalID:
V = getGlobalVal(ID.UIntVal, Ty, ID.Loc);
+ if (V && ID.NoCFI)
+ V = NoCFIValue::get(cast<GlobalValue>(V));
return V == nullptr;
case ValID::t_APSInt:
if (!Ty->isIntegerTy())
@@ -8533,6 +8552,7 @@ bool LLParser::parseFlag(unsigned &Val) {
/// [',' 'noUnwind' ':' Flag]? ')'
/// [',' 'mayThrow' ':' Flag]? ')'
/// [',' 'hasUnknownCall' ':' Flag]? ')'
+/// [',' 'mustBeUnreachable' ':' Flag]? ')'
bool LLParser::parseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
assert(Lex.getKind() == lltok::kw_funcFlags);
@@ -8599,6 +8619,12 @@ bool LLParser::parseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
return true;
FFlags.HasUnknownCall = Val;
break;
+ case lltok::kw_mustBeUnreachable:
+ Lex.Lex();
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
+ return true;
+ FFlags.MustBeUnreachable = Val;
+ break;
default:
return error(Lex.getLoc(), "expected function flag type");
}
diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index cd1d872cc219..284e469a1d2f 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -57,11 +57,7 @@ bool MetadataVerifier::verifyArray(
auto &Array = Node.getArray();
if (Size && Array.size() != *Size)
return false;
- for (auto &Item : Array)
- if (!verifyNode(Item))
- return false;
-
- return true;
+ return llvm::all_of(Array, verifyNode);
}
bool MetadataVerifier::verifyEntry(
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index d7bcb0d7f575..a36b256c29b6 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -107,9 +107,9 @@ static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
// Check to see if we have a blockinfo record for this record, with a name.
if (const BitstreamBlockInfo::BlockInfo *Info =
BlockInfo.getBlockInfo(BlockID)) {
- for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
- if (Info->RecordNames[i].first == CodeID)
- return Info->RecordNames[i].second.c_str();
+ for (const std::pair<unsigned, std::string> &RN : Info->RecordNames)
+ if (RN.first == CodeID)
+ return RN.second.c_str();
}
if (CurStreamType != LLVMIRBitstream)
@@ -219,6 +219,7 @@ static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
STRINGIFY_CODE(CST_CODE, CE_UNOP)
STRINGIFY_CODE(CST_CODE, DSO_LOCAL_EQUIVALENT)
+ STRINGIFY_CODE(CST_CODE, NO_CFI_VALUE)
case bitc::CST_CODE_BLOCKADDRESS:
return "CST_CODE_BLOCKADDRESS";
STRINGIFY_CODE(CST_CODE, DATA)
@@ -646,16 +647,14 @@ void BitcodeAnalyzer::printStats(BCDumpOptions O,
// Emit per-block stats.
O.OS << "Per-block Summary:\n";
- for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
- E = BlockIDStats.end();
- I != E; ++I) {
- O.OS << " Block ID #" << I->first;
+ for (const auto &Stat : BlockIDStats) {
+ O.OS << " Block ID #" << Stat.first;
if (Optional<const char *> BlockName =
- GetBlockName(I->first, BlockInfo, CurStreamType))
+ GetBlockName(Stat.first, BlockInfo, CurStreamType))
O.OS << " (" << *BlockName << ")";
O.OS << ":\n";
- const PerBlockIDStats &Stats = I->second;
+ const PerBlockIDStats &Stats = Stat.second;
O.OS << " Num Instances: " << Stats.NumInstances << "\n";
O.OS << " Total Size: ";
printSize(O.OS, Stats.NumBits);
@@ -694,8 +693,8 @@ void BitcodeAnalyzer::printStats(BCDumpOptions O,
O.OS << "\tRecord Histogram:\n";
O.OS << "\t\t Count # Bits b/Rec % Abv Record Kind\n";
- for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
- const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
+ for (const auto &FreqPair : FreqPairs) {
+ const PerRecordStats &RecStats = Stats.CodeFreq[FreqPair.second];
O.OS << format("\t\t%7d %9lu", RecStats.NumInstances,
(unsigned long)RecStats.TotalBits);
@@ -714,10 +713,10 @@ void BitcodeAnalyzer::printStats(BCDumpOptions O,
O.OS << " ";
if (Optional<const char *> CodeName = GetCodeName(
- FreqPairs[i].second, I->first, BlockInfo, CurStreamType))
+ FreqPair.second, Stat.first, BlockInfo, CurStreamType))
O.OS << *CodeName << "\n";
else
- O.OS << "UnknownCode" << FreqPairs[i].second << "\n";
+ O.OS << "UnknownCode" << FreqPair.second << "\n";
}
O.OS << "\n";
}
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 993cb1de8c02..f5a878f8788a 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -488,6 +488,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
BitcodeReaderValueList ValueList;
Optional<MetadataLoader> MDLoader;
std::vector<Comdat *> ComdatList;
+ DenseSet<GlobalObject *> ImplicitComdatObjects;
SmallVector<Instruction *, 64> InstructionList;
std::vector<std::pair<GlobalVariable *, unsigned>> GlobalInits;
@@ -932,6 +933,7 @@ static FunctionSummary::FFlags getDecodedFFlags(uint64_t RawFlags) {
Flags.NoUnwind = (RawFlags >> 6) & 0x1;
Flags.MayThrow = (RawFlags >> 7) & 0x1;
Flags.HasUnknownCall = (RawFlags >> 8) & 0x1;
+ Flags.MustBeUnreachable = (RawFlags >> 9) & 0x1;
return Flags;
}
@@ -2037,14 +2039,8 @@ Expected<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
return error("Invalid value name");
V->setName(NameStr);
auto *GO = dyn_cast<GlobalObject>(V);
- if (GO) {
- if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
- if (TT.supportsCOMDAT())
- GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
- else
- GO->setComdat(nullptr);
- }
- }
+ if (GO && ImplicitComdatObjects.contains(GO) && TT.supportsCOMDAT())
+ GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
return V;
}
@@ -2942,6 +2938,19 @@ Error BitcodeReader::parseConstants() {
V = DSOLocalEquivalent::get(GV);
break;
}
+ case bitc::CST_CODE_NO_CFI_VALUE: {
+ if (Record.size() < 2)
+ return error("Invalid record");
+ Type *GVTy = getTypeByID(Record[0]);
+ if (!GVTy)
+ return error("Invalid record");
+ GlobalValue *GV = dyn_cast_or_null<GlobalValue>(
+ ValueList.getConstantFwdRef(Record[1], GVTy));
+ if (!GV)
+ return error("Invalid record");
+ V = NoCFIValue::get(GV);
+ break;
+ }
}
ValueList.assignValue(V, NextCstNo);
@@ -3292,7 +3301,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
NewGV->setComdat(ComdatList[ComdatID - 1]);
}
} else if (hasImplicitComdat(RawLinkage)) {
- NewGV->setComdat(reinterpret_cast<Comdat *>(1));
+ ImplicitComdatObjects.insert(NewGV);
}
if (Record.size() > 12) {
@@ -3426,7 +3435,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
Func->setComdat(ComdatList[ComdatID - 1]);
}
} else if (hasImplicitComdat(RawLinkage)) {
- Func->setComdat(reinterpret_cast<Comdat *>(1));
+ ImplicitComdatObjects.insert(Func);
}
if (Record.size() > 13)
@@ -6733,10 +6742,10 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
// not have its own string table. A bitcode file may have multiple
// string tables if it was created by binary concatenation, for example
// with "llvm-cat -b".
- for (auto I = F.Mods.rbegin(), E = F.Mods.rend(); I != E; ++I) {
- if (!I->Strtab.empty())
+ for (BitcodeModule &I : llvm::reverse(F.Mods)) {
+ if (!I.Strtab.empty())
break;
- I->Strtab = *Strtab;
+ I.Strtab = *Strtab;
}
// Similarly, the string table is used by every preceding symbol table;
// normally there will be just one unless the bitcode file was created
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index e2354c40844a..dc06bc10cf95 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -833,8 +833,7 @@ void ModuleBitcodeWriter::writeAttributeTable() {
Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
SmallVector<uint64_t, 64> Record;
- for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
- AttributeList AL = Attrs[i];
+ for (const AttributeList &AL : Attrs) {
for (unsigned i : AL.indexes()) {
AttributeSet AS = AL.getAttributes(i);
if (AS.hasAttributes())
@@ -1067,6 +1066,7 @@ static uint64_t getEncodedFFlags(FunctionSummary::FFlags Flags) {
RawFlags |= (Flags.NoUnwind << 6);
RawFlags |= (Flags.MayThrow << 7);
RawFlags |= (Flags.HasUnknownCall << 8);
+ RawFlags |= (Flags.MustBeUnreachable << 9);
return RawFlags;
}
@@ -2657,6 +2657,10 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
Code = bitc::CST_CODE_DSO_LOCAL_EQUIVALENT;
Record.push_back(VE.getTypeID(Equiv->getGlobalValue()->getType()));
Record.push_back(VE.getValueID(Equiv->getGlobalValue()));
+ } else if (const auto *NC = dyn_cast<NoCFIValue>(C)) {
+ Code = bitc::CST_CODE_NO_CFI_VALUE;
+ Record.push_back(VE.getTypeID(NC->getGlobalValue()->getType()));
+ Record.push_back(VE.getValueID(NC->getGlobalValue()));
} else {
#ifndef NDEBUG
C->dump();
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 07e0708e68c3..df4f1a1873d7 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -310,8 +310,7 @@ static UseListOrderStack predictUseListOrder(const Module &M) {
// We want to visit the functions backward now so we can list function-local
// constants in the last Function they're used in. Module-level constants
// have already been visited above.
- for (auto I = M.rbegin(), E = M.rend(); I != E; ++I) {
- const Function &F = *I;
+ for (const Function &F : llvm::reverse(M)) {
if (F.isDeclaration())
continue;
for (const BasicBlock &BB : F)
@@ -541,9 +540,8 @@ void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
const char *Name) const {
OS << "Map Name: " << Name << "\n";
OS << "Size: " << Map.size() << "\n";
- for (ValueMapType::const_iterator I = Map.begin(),
- E = Map.end(); I != E; ++I) {
- const Value *V = I->first;
+ for (const auto &I : Map) {
+ const Value *V = I.first;
if (V->hasName())
OS << "Value: " << V->getName();
else
@@ -569,10 +567,10 @@ void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map,
const char *Name) const {
OS << "Map Name: " << Name << "\n";
OS << "Size: " << Map.size() << "\n";
- for (auto I = Map.begin(), E = Map.end(); I != E; ++I) {
- const Metadata *MD = I->first;
- OS << "Metadata: slot = " << I->second.ID << "\n";
- OS << "Metadata: function = " << I->second.F << "\n";
+ for (const auto &I : Map) {
+ const Metadata *MD = I.first;
+ OS << "Metadata: slot = " << I.second.ID << "\n";
+ OS << "Metadata: function = " << I.second.F << "\n";
MD->print(OS);
OS << "\n";
}
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5984063627b0..5c64622c7245 100644
--- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -561,8 +561,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
<< ":\n");
std::map<unsigned, BitVector> RenameRegisterMap;
unsigned SuperReg = 0;
- for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
- unsigned Reg = Regs[i];
+ for (unsigned Reg : Regs) {
if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
SuperReg = Reg;
@@ -584,8 +583,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
}
// All group registers should be a subreg of SuperReg.
- for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
- unsigned Reg = Regs[i];
+ for (unsigned Reg : Regs) {
if (Reg == SuperReg) continue;
bool IsSub = TRI->isSubRegister(SuperReg, Reg);
// FIXME: remove this once PR18663 has been properly fixed. For now,
@@ -646,8 +644,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// For each referenced group register (which must be a SuperReg or
// a subregister of SuperReg), find the corresponding subregister
// of NewSuperReg and make sure it is free to be renamed.
- for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
- unsigned Reg = Regs[i];
+ for (unsigned Reg : Regs) {
unsigned NewReg = 0;
if (Reg == SuperReg) {
NewReg = NewSuperReg;
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index 7d8a73e12d3a..7e68e5e22879 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -712,8 +712,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
// The manipulations performed when we're looking through an insertvalue or
// an extractvalue would happen at the front of the RetPath list, so since
// we have to copy it anyway it's more efficient to create a reversed copy.
- SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend());
- SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend());
+ SmallVector<unsigned, 4> TmpRetPath(llvm::reverse(RetPath));
+ SmallVector<unsigned, 4> TmpCallPath(llvm::reverse(CallPath));
// Finally, we can check whether the value produced by the tail call at this
// index is compatible with the value we return.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 828cb760b82e..533f20535655 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -180,7 +180,7 @@ Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL,
Alignment = InAlign;
// If the GV has a specified alignment, take it into account.
- const MaybeAlign GVAlign(GV->getAlignment());
+ const MaybeAlign GVAlign(GV->getAlign());
if (!GVAlign)
return Alignment;
@@ -288,7 +288,11 @@ bool AsmPrinter::doInitialization(Module &M) {
// use the directive, where it would need the same conditionalization
// anyway.
const Triple &Target = TM.getTargetTriple();
- OutStreamer->emitVersionForTarget(Target, M.getSDKVersion());
+ Triple TVT(M.getDarwinTargetVariantTriple());
+ OutStreamer->emitVersionForTarget(
+ Target, M.getSDKVersion(),
+ M.getDarwinTargetVariantTriple().empty() ? nullptr : &TVT,
+ M.getDarwinTargetVariantSDKVersion());
// Allow the target to emit any magic that it wants at the start of the file.
emitStartOfAsmFile(M);
@@ -1856,6 +1860,17 @@ bool AsmPrinter::doFinalization(Module &M) {
continue;
OutStreamer->emitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference);
}
+ if (shouldEmitWeakSwiftAsyncExtendedFramePointerFlags()) {
+ auto SymbolName = "swift_async_extendedFramePointerFlags";
+ auto Global = M.getGlobalVariable(SymbolName);
+ if (!Global) {
+ auto Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ Global = new GlobalVariable(M, Int8PtrTy, false,
+ GlobalValue::ExternalWeakLinkage, nullptr,
+ SymbolName);
+ OutStreamer->emitSymbolAttribute(getSymbol(Global), MCSA_WeakReference);
+ }
+ }
}
// Print aliases in topological order, that is, for each alias a = b,
@@ -2502,6 +2517,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV))
return getObjFileLowering().lowerDSOLocalEquivalent(Equiv, TM);
+ if (const NoCFIValue *NC = dyn_cast<NoCFIValue>(CV))
+ return MCSymbolRefExpr::create(getSymbol(NC->getGlobalValue()), Ctx);
+
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
if (!CE) {
llvm_unreachable("Unknown constant value to lower!");
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 85ff84484ced..d621108408f0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -611,8 +611,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
void CodeViewDebug::beginModule(Module *M) {
// If module doesn't have named metadata anchors or COFF debug section
// is not available, skip any debug info related stuff.
- if (!M->getNamedMetadata("llvm.dbg.cu") ||
- !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
+ NamedMDNode *CUs = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CUs || !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
Asm = nullptr;
return;
}
@@ -622,7 +622,6 @@ void CodeViewDebug::beginModule(Module *M) {
TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch());
// Get the current source language.
- NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
const MDNode *Node = *CUs->operands().begin();
const auto *CU = cast<DICompileUnit>(Node);
@@ -650,6 +649,7 @@ void CodeViewDebug::endModule() {
switchToDebugSectionForSymbol(nullptr);
MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols);
+ emitObjName();
emitCompilerInformation();
endCVSubsection(CompilerInfo);
@@ -785,6 +785,29 @@ void CodeViewDebug::emitTypeGlobalHashes() {
}
}
+void CodeViewDebug::emitObjName() {
+ MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_OBJNAME);
+
+ StringRef PathRef(Asm->TM.Options.ObjectFilenameForDebug);
+ llvm::SmallString<256> PathStore(PathRef);
+
+ if (PathRef.empty() || PathRef == "-") {
+ // Don't emit the filename if we're writing to stdout or to /dev/null.
+ PathRef = {};
+ } else {
+ llvm::sys::path::remove_dots(PathStore, /*remove_dot_dot=*/true);
+ PathRef = PathStore;
+ }
+
+ OS.AddComment("Signature");
+ OS.emitIntValue(0, 4);
+
+ OS.AddComment("Object name");
+ emitNullTerminatedSymbolName(OS, PathRef);
+
+ endSymbolRecord(CompilerEnd);
+}
+
namespace {
struct Version {
int Part[4];
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 6f88e15ee8fe..d1fc3cdccb20 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -302,6 +302,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitTypeGlobalHashes();
+ void emitObjName();
+
void emitCompilerInformation();
void emitBuildInfo();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 0d2736178f0f..9b73f0ab2f05 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -779,7 +779,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
auto AddEntry = [&](const DbgValueLocEntry &Entry,
- DIExpressionCursor &Cursor) {
+ DIExpressionCursor &Cursor) {
if (Entry.isLocation()) {
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor,
Entry.getLoc().getReg()))
@@ -788,11 +788,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
// If there is an expression, emit raw unsigned bytes.
DwarfExpr.addUnsignedConstant(Entry.getInt());
} else if (Entry.isConstantFP()) {
+ // DwarfExpression does not support arguments wider than 64 bits
+ // (see PR52584).
+ // TODO: Consider chunking expressions containing overly wide
+ // arguments into separate pointer-sized fragment expressions.
APInt RawBytes = Entry.getConstantFP()->getValueAPF().bitcastToAPInt();
- DwarfExpr.addUnsignedConstant(RawBytes);
+ if (RawBytes.getBitWidth() > 64)
+ return false;
+ DwarfExpr.addUnsignedConstant(RawBytes.getZExtValue());
} else if (Entry.isConstantInt()) {
APInt RawBytes = Entry.getConstantInt()->getValue();
- DwarfExpr.addUnsignedConstant(RawBytes);
+ if (RawBytes.getBitWidth() > 64)
+ return false;
+ DwarfExpr.addUnsignedConstant(RawBytes.getZExtValue());
} else if (Entry.isTargetIndexLocation()) {
TargetIndexLocation Loc = Entry.getTargetIndexLocation();
// TODO TargetIndexLocation is a target-independent. Currently only the
@@ -805,11 +813,12 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
return true;
};
- DwarfExpr.addExpression(
- std::move(Cursor),
- [&](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
- return AddEntry(DVal->getLocEntries()[Idx], Cursor);
- });
+ if (!DwarfExpr.addExpression(
+ std::move(Cursor),
+ [&](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
+ return AddEntry(DVal->getLocEntries()[Idx], Cursor);
+ }))
+ return VariableDie;
// Now attach the location information to the DIE.
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 047676d4c11e..48134f1fd774 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1224,17 +1224,15 @@ void DwarfDebug::beginModule(Module *M) {
CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV]));
}
- for (auto *Ty : CUNode->getEnumTypes()) {
- // The enum types array by design contains pointers to
- // MDNodes rather than DIRefs. Unique them here.
+ for (auto *Ty : CUNode->getEnumTypes())
CU.getOrCreateTypeDIE(cast<DIType>(Ty));
- }
+
for (auto *Ty : CUNode->getRetainedTypes()) {
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
if (DIType *RT = dyn_cast<DIType>(Ty))
- // There is no point in force-emitting a forward declaration.
- CU.getOrCreateTypeDIE(RT);
+ // There is no point in force-emitting a forward declaration.
+ CU.getOrCreateTypeDIE(RT);
}
// Emit imported_modules last so that the relevant context is already
// available.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 6409c39e7849..37407c98e75f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -463,15 +463,14 @@ static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
return true;
}
-void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
- unsigned FragmentOffsetInBits) {
+void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor) {
addExpression(std::move(ExprCursor),
[](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
llvm_unreachable("unhandled opcode found in expression");
});
}
-void DwarfExpression::addExpression(
+bool DwarfExpression::addExpression(
DIExpressionCursor &&ExprCursor,
llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg) {
// Entry values can currently only cover the initial register location,
@@ -496,7 +495,7 @@ void DwarfExpression::addExpression(
case dwarf::DW_OP_LLVM_arg:
if (!InsertArg(Op->getArg(0), ExprCursor)) {
LocationKind = Unknown;
- return;
+ return false;
}
break;
case dwarf::DW_OP_LLVM_fragment: {
@@ -527,7 +526,7 @@ void DwarfExpression::addExpression(
setSubRegisterPiece(0, 0);
// Reset the location description kind.
LocationKind = Unknown;
- return;
+ return true;
}
case dwarf::DW_OP_plus_uconst:
assert(!isRegisterLocation());
@@ -630,6 +629,8 @@ void DwarfExpression::addExpression(
if (isImplicitLocation() && !isParameterValue())
// Turn this into an implicit location description.
addStackValue();
+
+ return true;
}
/// add masking operations to stencil out a subregister.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 513e9072309e..e605fe2f7d39 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -340,16 +340,17 @@ public:
/// create one if necessary.
unsigned getOrCreateBaseType(unsigned BitSize, dwarf::TypeKind Encoding);
+ /// Emit all remaining operations in the DIExpressionCursor. The
+ /// cursor must not contain any DW_OP_LLVM_arg operations.
+ void addExpression(DIExpressionCursor &&Expr);
+
/// Emit all remaining operations in the DIExpressionCursor.
- ///
- /// \param FragmentOffsetInBits If this is one fragment out of multiple
- /// locations, this is the offset of the
- /// fragment inside the entire variable.
- void addExpression(DIExpressionCursor &&Expr,
- unsigned FragmentOffsetInBits = 0);
- void
- addExpression(DIExpressionCursor &&Expr,
- llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg);
+ /// DW_OP_LLVM_arg operations are resolved by calling (\p InsertArg).
+ //
+ /// \return false if any call to (\p InsertArg) returns false.
+ bool addExpression(
+ DIExpressionCursor &&Expr,
+ llvm::function_ref<bool(unsigned, DIExpressionCursor &)> InsertArg);
/// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
/// the fragment described by \c Expr.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 976e35905144..6b6d63f14f87 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -536,6 +536,18 @@ void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) {
}
}
+void DwarfUnit::addAccess(DIE &Die, DINode::DIFlags Flags) {
+ if ((Flags & DINode::FlagAccessibility) == DINode::FlagProtected)
+ addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if ((Flags & DINode::FlagAccessibility) == DINode::FlagPrivate)
+ addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ else if ((Flags & DINode::FlagAccessibility) == DINode::FlagPublic)
+ addUInt(Die, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+}
+
DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
if (!Context || isa<DIFile>(Context))
return &getUnitDie();
@@ -842,13 +854,17 @@ void DwarfUnit::addAnnotation(DIE &Buffer, DINodeArray Annotations) {
for (const Metadata *Annotation : Annotations->operands()) {
const MDNode *MD = cast<MDNode>(Annotation);
const MDString *Name = cast<MDString>(MD->getOperand(0));
-
- // Currently, only MDString is supported with btf_decl_tag attribute.
- const MDString *Value = cast<MDString>(MD->getOperand(1));
+ const auto &Value = MD->getOperand(1);
DIE &AnnotationDie = createAndAddDIE(dwarf::DW_TAG_LLVM_annotation, Buffer);
addString(AnnotationDie, dwarf::DW_AT_name, Name->getString());
- addString(AnnotationDie, dwarf::DW_AT_const_value, Value->getString());
+ if (const auto *Data = dyn_cast<MDString>(Value))
+ addString(AnnotationDie, dwarf::DW_AT_const_value, Data->getString());
+ else if (const auto *Data = dyn_cast<ConstantAsMetadata>(Value))
+ addConstantValue(AnnotationDie, Data->getValue()->getUniqueInteger(),
+ /*Unsigned=*/true);
+ else
+ assert(false && "Unsupported annotation value type");
}
}
@@ -1007,6 +1023,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (CTy->isForwardDecl())
addFlag(Buffer, dwarf::DW_AT_declaration);
+ // Add accessibility info if available.
+ addAccess(Buffer, CTy->getFlags());
+
// Add source line info if available.
if (!CTy->isForwardDecl())
addSourceLine(Buffer, CTy);
@@ -1308,15 +1327,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (SP->isNoReturn())
addFlag(SPDie, dwarf::DW_AT_noreturn);
- if (SP->isProtected())
- addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_protected);
- else if (SP->isPrivate())
- addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_private);
- else if (SP->isPublic())
- addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_public);
+ addAccess(SPDie, SP->getFlags());
if (SP->isExplicit())
addFlag(SPDie, dwarf::DW_AT_explicit);
@@ -1666,16 +1677,8 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
}
}
- if (DT->isProtected())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_protected);
- else if (DT->isPrivate())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_private);
- // Otherwise C++ member and base classes are considered public.
- else if (DT->isPublic())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_public);
+ addAccess(MemberDie, DT->getFlags());
+
if (DT->isVirtual())
addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
dwarf::DW_VIRTUALITY_virtual);
@@ -1717,15 +1720,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
// FIXME: We could omit private if the parent is a class_type, and
// public if the parent is something else.
- if (DT->isProtected())
- addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_protected);
- else if (DT->isPrivate())
- addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_private);
- else if (DT->isPublic())
- addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_public);
+ addAccess(StaticMemberDIE, DT->getFlags());
if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant()))
addConstantValue(StaticMemberDIE, CI, Ty);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 8140279adaef..54b0079dd7ce 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -226,6 +226,9 @@ public:
/// Add thrown types.
void addThrownTypes(DIE &Die, DINodeArray ThrownTypes);
+ /// Add the accessibility attribute.
+ void addAccess(DIE &Die, DINode::DIFlags Flags);
+
/// Add a new type attribute to the specified entity.
///
/// This takes and attribute parameter because DW_AT_friend attributes are
diff --git a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index a9fb31d42679..3ade262d9af2 100644
--- a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -112,16 +112,12 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
EmitCamlGlobal(M, AP, "frametable");
int NumDescriptors = 0;
- for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
- IE = Info.funcinfo_end();
- I != IE; ++I) {
- GCFunctionInfo &FI = **I;
- if (FI.getStrategy().getName() != getStrategy().getName())
+ for (std::unique_ptr<GCFunctionInfo> &FI :
+ llvm::make_range(Info.funcinfo_begin(), Info.funcinfo_end())) {
+ if (FI->getStrategy().getName() != getStrategy().getName())
// this function is managed by some other GC
continue;
- for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
- NumDescriptors++;
- }
+ NumDescriptors += FI->size();
}
if (NumDescriptors >= 1 << 16) {
@@ -131,35 +127,34 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AP.emitInt16(NumDescriptors);
AP.emitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
- for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
- IE = Info.funcinfo_end();
- I != IE; ++I) {
- GCFunctionInfo &FI = **I;
- if (FI.getStrategy().getName() != getStrategy().getName())
+ for (std::unique_ptr<GCFunctionInfo> &FI :
+ llvm::make_range(Info.funcinfo_begin(), Info.funcinfo_end())) {
+ if (FI->getStrategy().getName() != getStrategy().getName())
// this function is managed by some other GC
continue;
- uint64_t FrameSize = FI.getFrameSize();
+ uint64_t FrameSize = FI->getFrameSize();
if (FrameSize >= 1 << 16) {
// Very rude!
- report_fatal_error("Function '" + FI.getFunction().getName() +
+ report_fatal_error("Function '" + FI->getFunction().getName() +
"' is too large for the ocaml GC! "
"Frame size " +
Twine(FrameSize) +
">= 65536.\n"
"(" +
- Twine(reinterpret_cast<uintptr_t>(&FI)) + ")");
+ Twine(reinterpret_cast<uintptr_t>(FI.get())) + ")");
}
AP.OutStreamer->AddComment("live roots for " +
- Twine(FI.getFunction().getName()));
+ Twine(FI->getFunction().getName()));
AP.OutStreamer->AddBlankLine();
- for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
- size_t LiveCount = FI.live_size(J);
+ for (GCFunctionInfo::iterator J = FI->begin(), JE = FI->end(); J != JE;
+ ++J) {
+ size_t LiveCount = FI->live_size(J);
if (LiveCount >= 1 << 16) {
// Very rude!
- report_fatal_error("Function '" + FI.getFunction().getName() +
+ report_fatal_error("Function '" + FI->getFunction().getName() +
"' is too large for the ocaml GC! "
"Live root count " +
Twine(LiveCount) + " >= 65536.");
@@ -169,8 +164,8 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AP.emitInt16(FrameSize);
AP.emitInt16(LiveCount);
- for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
- KE = FI.live_end(J);
+ for (GCFunctionInfo::live_iterator K = FI->live_begin(J),
+ KE = FI->live_end(J);
K != KE; ++K) {
if (K->StackOffset >= 1 << 16) {
// Very rude!
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index 9e6f1a537de3..bab187f46535 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -47,7 +47,6 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
InlinedAt = InlinedAt->getInlinedAt();
}
- SmallVector<InlineSite, 8> InlineStack(ReversedInlineStack.rbegin(),
- ReversedInlineStack.rend());
+ SmallVector<InlineSite, 8> InlineStack(llvm::reverse(ReversedInlineStack));
Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack);
}
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 64dadc82b48b..0ff67f7ca00a 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1125,8 +1125,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// If this is a large problem, avoid visiting the same basic blocks multiple
// times.
if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
+ for (MergePotentialsElt &Elt : MergePotentials)
+ TriedMerging.insert(Elt.getBlock());
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength);
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 863a0e1e0b56..5f9982cd155d 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -15,13 +15,13 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/StackMaps.h"
#include <cassert>
#include <tuple>
@@ -35,7 +35,7 @@ void VirtRegAuxInfo::calculateSpillWeightsAndHints() {
MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
calculateSpillWeightAndHint(LIS.getInterval(Reg));
@@ -64,14 +64,14 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg,
if (Register::isVirtualRegister(HReg))
return Sub == HSub ? HReg : Register();
- const TargetRegisterClass *rc = MRI.getRegClass(Reg);
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
MCRegister CopiedPReg = HSub ? TRI.getSubReg(HReg, HSub) : HReg.asMCReg();
- if (rc->contains(CopiedPReg))
+ if (RC->contains(CopiedPReg))
return CopiedPReg;
// Check if reg:sub matches so that a super register could be hinted.
if (Sub)
- return TRI.getMatchingSuperReg(CopiedPReg, Sub, rc);
+ return TRI.getMatchingSuperReg(CopiedPReg, Sub, RC);
return 0;
}
@@ -80,8 +80,8 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg,
static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS,
const VirtRegMap &VRM,
const TargetInstrInfo &TII) {
- unsigned Reg = LI.reg();
- unsigned Original = VRM.getOriginal(Reg);
+ Register Reg = LI.reg();
+ Register Original = VRM.getOriginal(Reg);
for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I != E; ++I) {
const VNInfo *VNI = *I;
@@ -183,8 +183,8 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
bool ShouldUpdateLI = !IsLocalSplitArtifact;
if (IsLocalSplitArtifact) {
- MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*End);
- assert(localMBB == LIS.getMBBFromIndex(*Start) &&
+ MachineBasicBlock *LocalMBB = LIS.getMBBFromIndex(*End);
+ assert(LocalMBB == LIS.getMBBFromIndex(*Start) &&
"start and end are expected to be in the same basic block");
// Local split artifact will have 2 additional copy instructions and they
@@ -192,8 +192,8 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// localLI = COPY other
// ...
// other = COPY localLI
- TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
- TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
+ TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB);
+ TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB);
NumInstr += 2;
}
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index bbdd8aab502e..7c236a9785d8 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -68,6 +68,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineCSEPass(Registry);
initializeMachineCombinerPass(Registry);
initializeMachineCopyPropagationPass(Registry);
+ initializeMachineCycleInfoPrinterPassPass(Registry);
+ initializeMachineCycleInfoWrapperPassPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachineFunctionPrinterPassPass(Registry);
initializeMachineLICMPass(Registry);
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index ac4180c4c3ab..747f4e4fdecc 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -4831,9 +4831,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
TargetLowering::AsmOperandInfoVector TargetConstraints =
TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI);
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
+ for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, SDValue());
@@ -5617,9 +5615,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
TargetLowering::AsmOperandInfoVector TargetConstraints =
TLI->ParseConstraints(*DL, TRI, *CS);
unsigned ArgNo = 0;
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
+ for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
@@ -6856,8 +6852,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// Use reverse iterator because later select may use the value of the
// earlier select, and we need to propagate value through earlier select
// to get the PHI operand.
- for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
- SelectInst *SI = *It;
+ for (SelectInst *SI : llvm::reverse(ASI)) {
// The select itself is replaced with a PHI Node.
PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
PN->takeName(SI);
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 4e98d49206b5..901409ea9f8f 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -405,8 +405,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin,
const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &Forbid) {
ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
- for (unsigned i = 0; i != Order.size(); ++i) {
- unsigned NewReg = Order[i];
+ for (unsigned NewReg : Order) {
// Don't replace a register with itself.
if (NewReg == AntiDepReg) continue;
// Don't replace a register with one that was recently used to repair
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 0bb186a02416..5579152f1ce0 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -142,9 +142,9 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
if (isDead(&MI)) {
LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI);
// It is possible that some DBG_VALUE instructions refer to this
- // instruction. They get marked as undef and will be deleted
- // in the live debug variable analysis.
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ // instruction. They will be deleted in the live debug variable
+ // analysis.
+ MI.eraseFromParent();
AnyChanges = true;
++NumDeletes;
continue;
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 90883212a275..0b5469b02637 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -210,9 +210,9 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
// Check all instructions, except the terminators. It is assumed that
// terminators never have side effects or define any used register values.
- for (MachineBasicBlock::iterator I = MBB->begin(),
- E = MBB->getFirstTerminator(); I != E; ++I) {
- if (I->isDebugInstr())
+ for (MachineInstr &MI :
+ llvm::make_range(MBB->begin(), MBB->getFirstTerminator())) {
+ if (MI.isDebugInstr())
continue;
if (++InstrCount > BlockInstrLimit && !Stress) {
@@ -222,28 +222,28 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
}
// There shouldn't normally be any phis in a single-predecessor block.
- if (I->isPHI()) {
- LLVM_DEBUG(dbgs() << "Can't hoist: " << *I);
+ if (MI.isPHI()) {
+ LLVM_DEBUG(dbgs() << "Can't hoist: " << MI);
return false;
}
// Don't speculate loads. Note that it may be possible and desirable to
// speculate GOT or constant pool loads that are guaranteed not to trap,
// but we don't support that for now.
- if (I->mayLoad()) {
- LLVM_DEBUG(dbgs() << "Won't speculate load: " << *I);
+ if (MI.mayLoad()) {
+ LLVM_DEBUG(dbgs() << "Won't speculate load: " << MI);
return false;
}
// We never speculate stores, so an AA pointer isn't necessary.
bool DontMoveAcrossStore = true;
- if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) {
- LLVM_DEBUG(dbgs() << "Can't speculate: " << *I);
+ if (!MI.isSafeToMove(nullptr, DontMoveAcrossStore)) {
+ LLVM_DEBUG(dbgs() << "Can't speculate: " << MI);
return false;
}
// Check for any dependencies on Head instructions.
- if (!InstrDependenciesAllowIfConv(&(*I)))
+ if (!InstrDependenciesAllowIfConv(&MI))
return false;
}
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 17094a8e44f8..d061664e8c5d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -256,7 +256,7 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
LLT PartLLT = MRI.getType(SrcRegs[0]);
// Deal with v3s16 split into v2s16
- LLT LCMTy = getLCMType(LLTy, PartLLT);
+ LLT LCMTy = getCoverTy(LLTy, PartLLT);
if (LCMTy == LLTy) {
// Common case where no padding is needed.
assert(DstRegs.size() == 1);
@@ -267,21 +267,9 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
// widening the original value.
Register UnmergeSrcReg;
if (LCMTy != PartLLT) {
- // e.g. A <3 x s16> value was split to <2 x s16>
- // %register_value0:_(<2 x s16>)
- // %register_value1:_(<2 x s16>)
- // %undef:_(<2 x s16>) = G_IMPLICIT_DEF
- // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef
- // %dst_reg:_(<3 x s16>), %dead:_(<3 x s16>) = G_UNMERGE_VALUES %concat
- const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits();
- Register Undef = B.buildUndef(PartLLT).getReg(0);
-
- // Build vector of undefs.
- SmallVector<Register, 8> WidenedSrcs(NumWide, Undef);
-
- // Replace the first sources with the real registers.
- std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin());
- UnmergeSrcReg = B.buildConcatVectors(LCMTy, WidenedSrcs).getReg(0);
+ assert(DstRegs.size() == 1);
+ return B.buildDeleteTrailingVectorElements(DstRegs[0],
+ B.buildMerge(LCMTy, SrcRegs));
} else {
// We don't need to widen anything if we're extracting a scalar which was
// promoted to a vector e.g. s8 -> v4s8 -> s8
@@ -298,6 +286,8 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
for (int I = DstRegs.size(); I != NumDst; ++I)
PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
+ if (PadDstRegs.size() == 1)
+ return B.buildDeleteTrailingVectorElements(DstRegs[0], UnmergeSrcReg);
return B.buildUnmerge(PadDstRegs, UnmergeSrcReg);
}
@@ -485,7 +475,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
MachineRegisterInfo &MRI = *B.getMRI();
LLT DstTy = MRI.getType(DstRegs[0]);
- LLT LCMTy = getLCMType(SrcTy, PartTy);
+ LLT LCMTy = getCoverTy(SrcTy, PartTy);
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
@@ -493,7 +483,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
Register UnmergeSrc = SrcReg;
- if (CoveringSize != SrcSize) {
+ if (!LCMTy.isVector() && CoveringSize != SrcSize) {
// For scalars, it's common to be able to use a simple extension.
if (SrcTy.isScalar() && DstTy.isScalar()) {
CoveringSize = alignTo(SrcSize, DstSize);
@@ -510,14 +500,10 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
}
}
- // Unmerge to the original registers and pad with dead defs.
- SmallVector<Register, 8> UnmergeResults(DstRegs.begin(), DstRegs.end());
- for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize;
- Size += DstSize) {
- UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy));
- }
+ if (LCMTy.isVector() && CoveringSize != SrcSize)
+ UnmergeSrc = B.buildPadVectorWithUndefElements(LCMTy, SrcReg).getReg(0);
- B.buildUnmerge(UnmergeResults, UnmergeSrc);
+ B.buildUnmerge(DstRegs, UnmergeSrc);
}
bool CallLowering::determineAndHandleAssignments(
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 381c6df5c97a..dd1ef74e8ad0 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -135,7 +135,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
// Erase dead insts before even adding to the list.
if (isTriviallyDead(CurMI, *MRI)) {
LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
- CurMI.eraseFromParentAndMarkDBGValuesForRemoval();
+ CurMI.eraseFromParent();
continue;
}
WorkList.deferred_insert(&CurMI);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 755b3b844570..f7a634dad61a 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1551,8 +1551,8 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
// These were one use so it's safe to remove them.
- MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval();
- MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval();
+ MatchInfo.Shift2->eraseFromParent();
+ MatchInfo.Logic->eraseFromParent();
MI.eraseFromParent();
}
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 87cc60d51bc2..6d415c9c7f90 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -338,9 +338,10 @@ bool IRTranslator::translateCompare(const User &U,
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
else {
- assert(CI && "Instruction should be CmpInst");
- MIRBuilder.buildFCmp(Pred, Res, Op0, Op1,
- MachineInstr::copyFlagsFromInstruction(*CI));
+ uint16_t Flags = 0;
+ if (CI)
+ Flags = MachineInstr::copyFlagsFromInstruction(*CI);
+ MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags);
}
return true;
@@ -3502,7 +3503,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Get rid of the now empty basic block.
EntryBB->removeSuccessor(&NewEntryBB);
MF->remove(EntryBB);
- MF->DeleteMachineBasicBlock(EntryBB);
+ MF->deleteMachineBasicBlock(EntryBB);
assert(&MF->front() == &NewEntryBB &&
"New entry wasn't next in the list of basic block!");
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 9b2692486384..b10c9272a508 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -163,7 +163,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// If so, erase it.
if (isTriviallyDead(MI, MRI)) {
LLVM_DEBUG(dbgs() << "Is dead; erasing.\n");
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ MI.eraseFromParent();
continue;
}
@@ -255,8 +255,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MI = nullptr;
if (!MRI.def_empty(VReg))
MI = &*MRI.def_instr_begin(VReg);
- else if (!MRI.use_empty(VReg))
+ else if (!MRI.use_empty(VReg)) {
MI = &*MRI.use_instr_begin(VReg);
+ // Debug value instruction is permitted to use undefined vregs.
+ if (MI->isDebugValue())
+ continue;
+ }
if (!MI)
continue;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e09cd26eb0c1..e8a8efd5dad4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -176,16 +176,18 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
return true;
}
+ // Perform irregular split. Leftover is last element of RegPieces.
if (MainTy.isVector()) {
- unsigned EltSize = MainTy.getScalarSizeInBits();
- if (LeftoverSize % EltSize != 0)
- return false;
- LeftoverTy = LLT::scalarOrVector(
- ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
- } else {
- LeftoverTy = LLT::scalar(LeftoverSize);
+ SmallVector<Register, 8> RegPieces;
+ extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
+ for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
+ VRegs.push_back(RegPieces[i]);
+ LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
+ LeftoverTy = MRI.getType(LeftoverRegs[0]);
+ return true;
}
+ LeftoverTy = LLT::scalar(LeftoverSize);
// For irregular sizes, extract the individual parts.
for (unsigned I = 0; I != NumParts; ++I) {
Register NewReg = MRI.createGenericVirtualRegister(MainTy);
@@ -203,6 +205,44 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
return true;
}
+void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
+ SmallVectorImpl<Register> &VRegs) {
+ LLT RegTy = MRI.getType(Reg);
+ assert(RegTy.isVector() && "Expected a vector type");
+
+ LLT EltTy = RegTy.getElementType();
+ LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
+ unsigned RegNumElts = RegTy.getNumElements();
+ unsigned LeftoverNumElts = RegNumElts % NumElts;
+ unsigned NumNarrowTyPieces = RegNumElts / NumElts;
+
+ // Perfect split without leftover
+ if (LeftoverNumElts == 0)
+ return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
+
+ // Irregular split. Provide direct access to all elements for artifact
+ // combiner using unmerge to elements. Then build vectors with NumElts
+ // elements. Remaining element(s) will be (used to build vector) Leftover.
+ SmallVector<Register, 8> Elts;
+ extractParts(Reg, EltTy, RegNumElts, Elts);
+
+ unsigned Offset = 0;
+ // Requested sub-vectors of NarrowTy.
+ for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
+ ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
+ VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
+ }
+
+ // Leftover element(s).
+ if (LeftoverNumElts == 1) {
+ VRegs.push_back(Elts[Offset]);
+ } else {
+ LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
+ ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
+ VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
+ }
+}
+
void LegalizerHelper::insertParts(Register DstReg,
LLT ResultTy, LLT PartTy,
ArrayRef<Register> PartRegs,
@@ -223,6 +263,15 @@ void LegalizerHelper::insertParts(Register DstReg,
return;
}
+ // Merge sub-vectors with different number of elements and insert into DstReg.
+ if (ResultTy.isVector()) {
+ assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
+ SmallVector<Register, 8> AllRegs;
+ for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
+ AllRegs.push_back(Reg);
+ return mergeMixedSubvectors(DstReg, AllRegs);
+ }
+
SmallVector<Register> GCDRegs;
LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
@@ -231,6 +280,30 @@ void LegalizerHelper::insertParts(Register DstReg,
buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
}
+void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
+ Register Reg) {
+ LLT Ty = MRI.getType(Reg);
+ SmallVector<Register, 8> RegElts;
+ extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
+ Elts.append(RegElts);
+}
+
+/// Merge \p PartRegs with different types into \p DstReg.
+void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
+ ArrayRef<Register> PartRegs) {
+ SmallVector<Register, 8> AllElts;
+ for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
+ appendVectorElts(AllElts, PartRegs[i]);
+
+ Register Leftover = PartRegs[PartRegs.size() - 1];
+ if (MRI.getType(Leftover).isScalar())
+ AllElts.push_back(Leftover);
+ else
+ appendVectorElts(AllElts, Leftover);
+
+ MIRBuilder.buildMerge(DstReg, AllElts);
+}
+
/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
const MachineInstr &MI) {
@@ -916,8 +989,26 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
}
- case TargetOpcode::G_FREEZE:
- return reduceOperationWidth(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_FREEZE: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ // Should widen scalar first
+ if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
+ return UnableToLegalize;
+
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
+ SmallVector<Register, 8> Parts;
+ for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
+ Parts.push_back(
+ MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
case TargetOpcode::G_SADDO:
@@ -1372,37 +1463,17 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
+ Register Dst = MO.getReg();
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MO.setReg(DstExt);
+ MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
}
void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
-
- LLT OldTy = MRI.getType(MO.getReg());
- unsigned OldElts = OldTy.getNumElements();
- unsigned NewElts = MoreTy.getNumElements();
-
- unsigned NumParts = NewElts / OldElts;
-
- // Use concat_vectors if the result is a multiple of the number of elements.
- if (NumParts * OldElts == NewElts) {
- SmallVector<Register, 8> Parts;
- Parts.push_back(MO.getReg());
-
- Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
- for (unsigned I = 1; I != NumParts; ++I)
- Parts.push_back(ImpDef);
-
- auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
- MO.setReg(Concat.getReg(0));
- return;
- }
-
- Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
- Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
- MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
- MO.setReg(MoreReg);
+ SmallVector<Register, 8> Regs;
+ MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
}
void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
@@ -3558,20 +3629,83 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
}
-LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
- MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT LCMTy = getLCMType(DstTy, NarrowTy);
+#ifndef NDEBUG
+/// Check that all vector operands have same number of elements. Other operands
+/// should be listed in NonVecOp.
+static bool hasSameNumEltsOnAllVectorOperands(
+ GenericMachineInstr &MI, MachineRegisterInfo &MRI,
+ std::initializer_list<unsigned> NonVecOpIndices) {
+ if (MI.getNumMemOperands() != 0)
+ return false;
- unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
+ LLT VecTy = MRI.getType(MI.getReg(0));
+ if (!VecTy.isVector())
+ return false;
+ unsigned NumElts = VecTy.getNumElements();
- auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
- SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
+ for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ if (!Op.isReg()) {
+ if (!is_contained(NonVecOpIndices, OpIdx))
+ return false;
+ continue;
+ }
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
- MI.eraseFromParent();
- return Legalized;
+ LLT Ty = MRI.getType(Op.getReg());
+ if (!Ty.isVector()) {
+ if (!is_contained(NonVecOpIndices, OpIdx))
+ return false;
+ is_contained(NonVecOpIndices, OpIdx);
+ continue;
+ }
+
+ if (Ty.getNumElements() != NumElts)
+ return false;
+ }
+
+ return true;
+}
+#endif
+
+/// Fill \p DstOps with DstOps that have same number of elements combined as
+/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
+/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
+/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
+static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
+ unsigned NumElts) {
+ LLT LeftoverTy;
+ assert(Ty.isVector() && "Expected vector type");
+ LLT EltTy = Ty.getElementType();
+ LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
+ int NumParts, NumLeftover;
+ std::tie(NumParts, NumLeftover) =
+ getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
+
+ assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
+ for (int i = 0; i < NumParts; ++i) {
+ DstOps.push_back(NarrowTy);
+ }
+
+ if (LeftoverTy.isValid()) {
+ assert(NumLeftover == 1 && "expected exactly one leftover");
+ DstOps.push_back(LeftoverTy);
+ }
+}
+
+/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
+/// made from \p Op depending on operand type.
+static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
+ MachineOperand &Op) {
+ for (unsigned i = 0; i < N; ++i) {
+ if (Op.isReg())
+ Ops.push_back(Op.getReg());
+ else if (Op.isImm())
+ Ops.push_back(Op.getImm());
+ else if (Op.isPredicate())
+ Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
+ else
+ llvm_unreachable("Unsupported type");
+ }
}
// Handle splitting vector operations which need to have the same number of
@@ -3588,335 +3722,116 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
// s64 = G_SHL s64, s32
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorMultiEltType(
- MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
- if (TypeIdx != 0)
- return UnableToLegalize;
+ GenericMachineInstr &MI, unsigned NumElts,
+ std::initializer_list<unsigned> NonVecOpIndices) {
+ assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
+ "Non-compatible opcode or not specified non-vector operands");
+ unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
- const LLT NarrowTy0 = NarrowTyArg;
- const Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT LeftoverTy0;
-
- // All of the operands need to have the same number of elements, so if we can
- // determine a type breakdown for the result type, we can for all of the
- // source types.
- int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
- if (NumParts < 0)
- return UnableToLegalize;
+ unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
- SmallVector<MachineInstrBuilder, 4> NewInsts;
-
- SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
- SmallVector<Register, 4> PartRegs, LeftoverRegs;
-
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- Register SrcReg = MI.getOperand(I).getReg();
- LLT SrcTyI = MRI.getType(SrcReg);
- const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount()
- : ElementCount::getFixed(1);
- LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType());
- LLT LeftoverTyI;
-
- // Split this operand into the requested typed registers, and any leftover
- // required to reproduce the original type.
- if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
- LeftoverRegs))
- return UnableToLegalize;
-
- if (I == 1) {
- // For the first operand, create an instruction for each part and setup
- // the result.
- for (Register PartReg : PartRegs) {
- Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
- .addDef(PartDstReg)
- .addUse(PartReg));
- DstRegs.push_back(PartDstReg);
- }
+ // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
+ // Build instructions with DstOps to use instruction found by CSE directly.
+ // CSE copies found instruction into given vreg when building with vreg dest.
+ SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
+ // Output registers will be taken from created instructions.
+ SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
+ for (unsigned i = 0; i < NumDefs; ++i) {
+ makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
+ }
- for (Register LeftoverReg : LeftoverRegs) {
- Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
- NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
- .addDef(PartDstReg)
- .addUse(LeftoverReg));
- LeftoverDstRegs.push_back(PartDstReg);
- }
+ // Split vector input operands into sub-vectors with NumElts elts + Leftover.
+ // Operands listed in NonVecOpIndices will be used as is without splitting;
+ // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
+ // scalar condition (op 1), immediate in sext_inreg (op 2).
+ SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
+ for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
+ ++UseIdx, ++UseNo) {
+ if (is_contained(NonVecOpIndices, UseIdx)) {
+ broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
+ MI.getOperand(UseIdx));
} else {
- assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
-
- // Add the newly created operand splits to the existing instructions. The
- // odd-sized pieces are ordered after the requested NarrowTyArg sized
- // pieces.
- unsigned InstCount = 0;
- for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
- NewInsts[InstCount++].addUse(PartRegs[J]);
- for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
- NewInsts[InstCount++].addUse(LeftoverRegs[J]);
+ SmallVector<Register, 8> SplitPieces;
+ extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
+ for (auto Reg : SplitPieces)
+ InputOpsPieces[UseNo].push_back(Reg);
}
-
- PartRegs.clear();
- LeftoverRegs.clear();
}
- // Insert the newly built operations and rebuild the result register.
- for (auto &MIB : NewInsts)
- MIRBuilder.insertInstr(MIB);
+ unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
- insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
+ // Take i-th piece of each input operand split and build sub-vector/scalar
+ // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
+ for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
+ SmallVector<DstOp, 2> Defs;
+ for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
+ Defs.push_back(OutputOpsPieces[DstNo][i]);
- MI.eraseFromParent();
- return Legalized;
-}
+ SmallVector<SrcOp, 3> Uses;
+ for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
+ Uses.push_back(InputOpsPieces[InputNo][i]);
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
-
- LLT NarrowTy0 = NarrowTy;
- LLT NarrowTy1;
- unsigned NumParts;
-
- if (NarrowTy.isVector()) {
- // Uneven breakdown not handled.
- NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
- if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
- return UnableToLegalize;
-
- NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType());
- } else {
- NumParts = DstTy.getNumElements();
- NarrowTy1 = SrcTy.getElementType();
+ auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
+ for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
+ OutputRegs[DstNo].push_back(I.getReg(DstNo));
}
- SmallVector<Register, 4> SrcRegs, DstRegs;
- extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
-
- for (unsigned I = 0; I < NumParts; ++I) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- MachineInstr *NewInst =
- MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
-
- NewInst->setFlags(MI.getFlags());
- DstRegs.push_back(DstReg);
+ // Merge small outputs into MI's output for each def operand.
+ if (NumLeftovers) {
+ for (unsigned i = 0; i < NumDefs; ++i)
+ mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
+ } else {
+ for (unsigned i = 0; i < NumDefs; ++i)
+ MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
}
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
-
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(2).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(Src0Reg);
+LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
+ unsigned NumElts) {
+ unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
- unsigned NumParts;
- LLT NarrowTy0, NarrowTy1;
+ unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
- if (TypeIdx == 0) {
- unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
- unsigned OldElts = DstTy.getNumElements();
-
- NarrowTy0 = NarrowTy;
- NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
- NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(),
- SrcTy.getScalarSizeInBits())
- : SrcTy.getElementType();
-
- } else {
- unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
- unsigned OldElts = SrcTy.getNumElements();
+ SmallVector<DstOp, 8> OutputOpsPieces;
+ SmallVector<Register, 8> OutputRegs;
+ makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
- NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
- NarrowTy.getNumElements();
- NarrowTy0 =
- LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits());
- NarrowTy1 = NarrowTy;
+ // Instructions that perform register split will be inserted in basic block
+ // where register is defined (basic block is in the next operand).
+ SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
+ for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
+ UseIdx += 2, ++UseNo) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
}
- // FIXME: Don't know how to handle the situation where the small vectors
- // aren't all the same size yet.
- if (NarrowTy1.isVector() &&
- NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
- return UnableToLegalize;
-
- CmpInst::Predicate Pred
- = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ // Build PHIs with fewer elements.
+ unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
+ MIRBuilder.setInsertPt(*MI.getParent(), MI);
+ for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
+ auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
+ Phi.addDef(
+ MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
+ OutputRegs.push_back(Phi.getReg(0));
- SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
- extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
- extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
-
- for (unsigned I = 0; I < NumParts; ++I) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- DstRegs.push_back(DstReg);
-
- if (MI.getOpcode() == TargetOpcode::G_ICMP)
- MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
- else {
- MachineInstr *NewCmp
- = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
- NewCmp->setFlags(MI.getFlags());
+ for (unsigned j = 0; j < NumInputs / 2; ++j) {
+ Phi.addUse(InputOpsPieces[j][i]);
+ Phi.add(MI.getOperand(1 + j * 2 + 1));
}
}
- if (NarrowTy1.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register CondReg = MI.getOperand(1).getReg();
-
- unsigned NumParts = 0;
- LLT NarrowTy0, NarrowTy1;
-
- LLT DstTy = MRI.getType(DstReg);
- LLT CondTy = MRI.getType(CondReg);
- unsigned Size = DstTy.getSizeInBits();
-
- assert(TypeIdx == 0 || CondTy.isVector());
-
- if (TypeIdx == 0) {
- NarrowTy0 = NarrowTy;
- NarrowTy1 = CondTy;
-
- unsigned NarrowSize = NarrowTy0.getSizeInBits();
- // FIXME: Don't know how to handle the situation where the small vectors
- // aren't all the same size yet.
- if (Size % NarrowSize != 0)
- return UnableToLegalize;
-
- NumParts = Size / NarrowSize;
-
- // Need to break down the condition type
- if (CondTy.isVector()) {
- if (CondTy.getNumElements() == NumParts)
- NarrowTy1 = CondTy.getElementType();
- else
- NarrowTy1 =
- LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts),
- CondTy.getScalarSizeInBits());
- }
+ // Merge small outputs into MI's def.
+ if (NumLeftovers) {
+ mergeMixedSubvectors(MI.getReg(0), OutputRegs);
} else {
- NumParts = CondTy.getNumElements();
- if (NarrowTy.isVector()) {
- // TODO: Handle uneven breakdown.
- if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
- return UnableToLegalize;
-
- return UnableToLegalize;
- } else {
- NarrowTy0 = DstTy.getElementType();
- NarrowTy1 = NarrowTy;
- }
- }
-
- SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
- if (CondTy.isVector())
- extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
-
- extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
- extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
-
- for (unsigned i = 0; i < NumParts; ++i) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
- Src1Regs[i], Src2Regs[i]);
- DstRegs.push_back(DstReg);
- }
-
- if (NarrowTy0.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- const Register DstReg = MI.getOperand(0).getReg();
- LLT PhiTy = MRI.getType(DstReg);
- LLT LeftoverTy;
-
- // All of the operands need to have the same number of elements, so if we can
- // determine a type breakdown for the result type, we can for all of the
- // source types.
- int NumParts, NumLeftover;
- std::tie(NumParts, NumLeftover)
- = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
- if (NumParts < 0)
- return UnableToLegalize;
-
- SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
- SmallVector<MachineInstrBuilder, 4> NewInsts;
-
- const int TotalNumParts = NumParts + NumLeftover;
-
- // Insert the new phis in the result block first.
- for (int I = 0; I != TotalNumParts; ++I) {
- LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
- Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
- NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
- .addDef(PartDstReg));
- if (I < NumParts)
- DstRegs.push_back(PartDstReg);
- else
- LeftoverDstRegs.push_back(PartDstReg);
- }
-
- MachineBasicBlock *MBB = MI.getParent();
- MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
- insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
-
- SmallVector<Register, 4> PartRegs, LeftoverRegs;
-
- // Insert code to extract the incoming values in each predecessor block.
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
- PartRegs.clear();
- LeftoverRegs.clear();
-
- Register SrcReg = MI.getOperand(I).getReg();
- MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
- MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
-
- LLT Unused;
- if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
- LeftoverRegs))
- return UnableToLegalize;
-
- // Add the newly created operand splits to the existing instructions. The
- // odd-sized pieces are ordered after the requested NarrowTyArg sized
- // pieces.
- for (int J = 0; J != TotalNumParts; ++J) {
- MachineInstrBuilder MIB = NewInsts[J];
- MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
- MIB.addMBB(&OpMBB);
- }
+ MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
}
MI.eraseFromParent();
@@ -3927,27 +3842,36 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
- if (TypeIdx != 1)
- return UnableToLegalize;
-
const int NumDst = MI.getNumOperands() - 1;
const Register SrcReg = MI.getOperand(NumDst).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(SrcReg);
- // TODO: Create sequence of extracts.
- if (DstTy == NarrowTy)
+ if (TypeIdx != 1 || NarrowTy == DstTy)
return UnableToLegalize;
- LLT GCDTy = getGCDType(SrcTy, NarrowTy);
- if (DstTy == GCDTy) {
- // This would just be a copy of the same unmerge.
- // TODO: Create extracts, pad with undef and create intermediate merges.
+ // Requires compatible types. Otherwise SrcReg should have been defined by
+ // merge-like instruction that would get artifact combined. Most likely
+ // instruction that defines SrcReg has to perform more/fewer elements
+ // legalization compatible with NarrowTy.
+ assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
+ assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+
+ if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
+ (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
return UnableToLegalize;
- }
- auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ // This is most likely DstTy (smaller then register size) packed in SrcTy
+ // (larger then register size) and since unmerge was not combined it will be
+ // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
+ // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
+
+ // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
+ //
+ // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
+ // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
+ // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
const int NumUnmerge = Unmerge->getNumOperands() - 1;
const int PartsPerUnmerge = NumDst / NumUnmerge;
@@ -3964,89 +3888,87 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register Result = MI.getOperand(0).getReg();
- Register Overflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
-
- LLT SrcTy = MRI.getType(LHS);
- if (!SrcTy.isVector())
+LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ // Requires compatible types. Otherwise user of DstReg did not perform unmerge
+ // that should have been artifact combined. Most likely instruction that uses
+ // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
+ assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
+ assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+ if (NarrowTy == SrcTy)
return UnableToLegalize;
- LLT ElementType = SrcTy.getElementType();
- LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
- const ElementCount NumResult = SrcTy.getElementCount();
- LLT GCDTy = getGCDType(SrcTy, NarrowTy);
+ // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
+ // is for old mir tests. Since the changes to more/fewer elements it should no
+ // longer be possible to generate MIR like this when starting from llvm-ir
+ // because LCMTy approach was replaced with merge/unmerge to vector elements.
+ if (TypeIdx == 1) {
+ assert(SrcTy.isVector() && "Expected vector types");
+ assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+ if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
+ (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
+ return UnableToLegalize;
+ // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
+ //
+ // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
+ // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
+ // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
+ // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
+ // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
+ // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
- // Unmerge the operands to smaller parts of GCD type.
- auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
- auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
+ SmallVector<Register, 8> Elts;
+ LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
+ for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
+ auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
+ for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
+ Elts.push_back(Unmerge.getReg(j));
+ }
- const int NumOps = UnmergeLHS->getNumOperands() - 1;
- const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps);
- LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
- LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
+ SmallVector<Register, 8> NarrowTyElts;
+ unsigned NumNarrowTyElts = NarrowTy.getNumElements();
+ unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
+ for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
+ ++i, Offset += NumNarrowTyElts) {
+ ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
+ NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
+ }
- // Perform the operation over unmerged parts.
- SmallVector<Register, 8> ResultParts;
- SmallVector<Register, 8> OverflowParts;
- for (int I = 0; I != NumOps; ++I) {
- Register Operand1 = UnmergeLHS->getOperand(I).getReg();
- Register Operand2 = UnmergeRHS->getOperand(I).getReg();
- auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
- {Operand1, Operand2});
- ResultParts.push_back(PartMul->getOperand(0).getReg());
- OverflowParts.push_back(PartMul->getOperand(1).getReg());
+ MIRBuilder.buildMerge(DstReg, NarrowTyElts);
+ MI.eraseFromParent();
+ return Legalized;
}
- LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
- LLT OverflowLCMTy =
- LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy);
-
- // Recombine the pieces to the original result and overflow registers.
- buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
- buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
- MI.eraseFromParent();
- return Legalized;
-}
-
-// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
-// a vector
-//
-// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
-// undef as necessary.
-//
-// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
-// -> <2 x s16>
-//
-// %4:_(s16) = G_IMPLICIT_DEF
-// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
-// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
-// %7:_(<2 x s16>) = G_IMPLICIT_DEF
-// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
-// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
- LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
-
- // Break into a common type
- SmallVector<Register, 16> Parts;
- for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
- extractGCDType(Parts, GCDTy, MO.getReg());
+ assert(TypeIdx == 0 && "Bad type index");
+ if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
+ (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
+ return UnableToLegalize;
- // Build the requested new merge, padding with undef.
- LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
- TargetOpcode::G_ANYEXT);
+ // This is most likely SrcTy (smaller then register size) packed in DstTy
+ // (larger then register size) and since merge was not combined it will be
+ // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
+ // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
- // Pack into the original result register.
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
+ // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
+ //
+ // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
+ // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
+ // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
+ SmallVector<Register, 8> NarrowTyElts;
+ unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
+ unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
+ unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
+ for (unsigned i = 0; i < NumParts; ++i) {
+ SmallVector<Register, 8> Sources;
+ for (unsigned j = 0; j < NumElts; ++j)
+ Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
+ NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
+ }
+ MIRBuilder.buildMerge(DstReg, NarrowTyElts);
MI.eraseFromParent();
return Legalized;
}
@@ -4218,163 +4140,14 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
- LLT NarrowTy) {
- assert(TypeIdx == 0 && "only one type index expected");
-
- const unsigned Opc = MI.getOpcode();
- const int NumDefOps = MI.getNumExplicitDefs();
- const int NumSrcOps = MI.getNumOperands() - NumDefOps;
- const unsigned Flags = MI.getFlags();
- const unsigned NarrowSize = NarrowTy.getSizeInBits();
- const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
-
- assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 "
- "result and 1-3 sources or 2 results and "
- "1-2 sources");
-
- SmallVector<Register, 2> DstRegs;
- for (int I = 0; I < NumDefOps; ++I)
- DstRegs.push_back(MI.getOperand(I).getReg());
-
- // First of all check whether we are narrowing (changing the element type)
- // or reducing the vector elements
- const LLT DstTy = MRI.getType(DstRegs[0]);
- const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
-
- SmallVector<Register, 8> ExtractedRegs[3];
- SmallVector<Register, 8> Parts;
-
- // Break down all the sources into NarrowTy pieces we can operate on. This may
- // involve creating merges to a wider type, padded with undef.
- for (int I = 0; I != NumSrcOps; ++I) {
- Register SrcReg = MI.getOperand(I + NumDefOps).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
- // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
- // For fewerElements, this is a smaller vector with the same element type.
- LLT OpNarrowTy;
- if (IsNarrow) {
- OpNarrowTy = NarrowScalarTy;
-
- // In case of narrowing, we need to cast vectors to scalars for this to
- // work properly
- // FIXME: Can we do without the bitcast here if we're narrowing?
- if (SrcTy.isVector()) {
- SrcTy = LLT::scalar(SrcTy.getSizeInBits());
- SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
- }
- } else {
- auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount()
- : ElementCount::getFixed(1);
- OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType());
- }
-
- LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
-
- // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
- buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
- TargetOpcode::G_ANYEXT);
- }
-
- SmallVector<Register, 8> ResultRegs[2];
-
- // Input operands for each sub-instruction.
- SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register());
-
- int NumParts = ExtractedRegs[0].size();
- const unsigned DstSize = DstTy.getSizeInBits();
- const LLT DstScalarTy = LLT::scalar(DstSize);
-
- // Narrowing needs to use scalar types
- LLT DstLCMTy, NarrowDstTy;
- if (IsNarrow) {
- DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
- NarrowDstTy = NarrowScalarTy;
- } else {
- DstLCMTy = getLCMType(DstTy, NarrowTy);
- NarrowDstTy = NarrowTy;
- }
-
- // We widened the source registers to satisfy merge/unmerge size
- // constraints. We'll have some extra fully undef parts.
- const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
-
- for (int I = 0; I != NumRealParts; ++I) {
- // Emit this instruction on each of the split pieces.
- for (int J = 0; J != NumSrcOps; ++J)
- InputRegs[J] = ExtractedRegs[J][I];
-
- MachineInstrBuilder Inst;
- if (NumDefOps == 1)
- Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
- else
- Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs,
- Flags);
-
- for (int J = 0; J != NumDefOps; ++J)
- ResultRegs[J].push_back(Inst.getReg(J));
- }
-
- // Fill out the widened result with undef instead of creating instructions
- // with undef inputs.
- int NumUndefParts = NumParts - NumRealParts;
- if (NumUndefParts != 0) {
- Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0);
- for (int I = 0; I != NumDefOps; ++I)
- ResultRegs[I].append(NumUndefParts, Undef);
- }
-
- // Extract the possibly padded result. Use a scratch register if we need to do
- // a final bitcast, otherwise use the original result register.
- Register MergeDstReg;
- for (int I = 0; I != NumDefOps; ++I) {
- if (IsNarrow && DstTy.isVector())
- MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
- else
- MergeDstReg = DstRegs[I];
-
- buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]);
-
- // Recast to vector if we narrowed a vector
- if (IsNarrow && DstTy.isVector())
- MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg);
- }
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- int64_t Imm = MI.getOperand(2).getImm();
-
- LLT DstTy = MRI.getType(DstReg);
-
- SmallVector<Register, 8> Parts;
- LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
- LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
-
- for (Register &R : Parts)
- R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
-
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
using namespace TargetOpcode;
+ GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
+ unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
switch (MI.getOpcode()) {
case G_IMPLICIT_DEF:
- return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
case G_TRUNC:
case G_AND:
case G_OR:
@@ -4439,10 +4212,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_SSUBSAT:
case G_UADDSAT:
case G_USUBSAT:
- return reduceOperationWidth(MI, TypeIdx, NarrowTy);
case G_UMULO:
case G_SMULO:
- return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
case G_SHL:
case G_LSHR:
case G_ASHR:
@@ -4454,7 +4225,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_CTTZ_ZERO_UNDEF:
case G_CTPOP:
case G_FCOPYSIGN:
- return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
case G_ZEXT:
case G_SEXT:
case G_ANYEXT:
@@ -4467,14 +4237,16 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_INTTOPTR:
case G_PTRTOINT:
case G_ADDRSPACE_CAST:
- return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
- return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
case G_SELECT:
- return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
+ if (MRI.getType(MI.getOperand(1).getReg()).isVector())
+ return fewerElementsVectorMultiEltType(GMI, NumElts);
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
case G_PHI:
- return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorPhi(GMI, NumElts);
case G_UNMERGE_VALUES:
return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
case G_BUILD_VECTOR:
@@ -4491,7 +4263,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_STORE:
return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
case G_SEXT_INREG:
- return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
GISEL_VECREDUCE_CASES_NONSEQ
return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
case G_SHUFFLE_VECTOR:
@@ -5053,6 +4825,15 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_UADDSAT:
+ case TargetOpcode::G_USUBSAT:
+ case TargetOpcode::G_SADDSAT:
+ case TargetOpcode::G_SSUBSAT:
case TargetOpcode::G_SMIN:
case TargetOpcode::G_SMAX:
case TargetOpcode::G_UMIN:
@@ -5070,6 +4851,17 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FSHR:
+ case TargetOpcode::G_FSHL: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorSrc(MI, MoreTy, 3);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
case TargetOpcode::G_EXTRACT:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -5079,6 +4871,11 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
case TargetOpcode::G_INSERT:
case TargetOpcode::G_FREEZE:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_BSWAP:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_SEXT_INREG:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -5098,30 +4895,34 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_UNMERGE_VALUES: {
- if (TypeIdx != 1)
- return UnableToLegalize;
-
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- int NumDst = MI.getNumOperands() - 1;
- moreElementsVectorSrc(MI, MoreTy, NumDst);
-
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
- for (int I = 0; I != NumDst; ++I)
- MIB.addDef(MI.getOperand(I).getReg());
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return UnableToLegalize;
+ case TargetOpcode::G_PHI:
+ return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_BUILD_VECTOR: {
+ SmallVector<SrcOp, 8> Elts;
+ for (auto Op : MI.uses()) {
+ Elts.push_back(Op.getReg());
+ }
- int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
- for (int I = NumDst; I != NewNumDst; ++I)
- MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+ for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
+ Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
+ }
- MIB.addUse(MI.getOperand(NumDst).getReg());
+ MIRBuilder.buildDeleteTrailingVectorElements(
+ MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_PHI:
- return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
- case TargetOpcode::G_SHUFFLE_VECTOR:
- return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_TRUNC: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
default:
return UnableToLegalize;
}
@@ -6778,6 +6579,24 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
LLT VecTy = MRI.getType(SrcVec);
LLT EltTy = VecTy.getElementType();
+ unsigned NumElts = VecTy.getNumElements();
+
+ int64_t IdxVal;
+ if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
+ SmallVector<Register, 8> SrcRegs;
+ extractParts(SrcVec, EltTy, NumElts, SrcRegs);
+
+ if (InsertVal) {
+ SrcRegs[IdxVal] = MI.getOperand(2).getReg();
+ MIRBuilder.buildMerge(DstReg, SrcRegs);
+ } else {
+ MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
if (!EltTy.isByteSized()) { // Not implemented.
LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
return UnableToLegalize;
@@ -6796,7 +6615,6 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
// if the index is out of bounds.
Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
- int64_t IdxVal;
if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
int64_t Offset = IdxVal * EltBytes;
PtrInfo = PtrInfo.getWithOffset(Offset);
@@ -6923,6 +6741,32 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
+ // Extract sub-vector or one element
+ if (SrcTy.isVector()) {
+ unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
+ (Offset + DstSize <= SrcTy.getSizeInBits())) {
+ // Unmerge and allow access to each Src element for the artifact combiner.
+ auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src);
+
+ // Take element(s) we need to extract and copy it (merge them).
+ SmallVector<Register, 8> SubVectorElts;
+ for (unsigned Idx = Offset / SrcEltSize;
+ Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
+ SubVectorElts.push_back(Unmerge.getReg(Idx));
+ }
+ if (SubVectorElts.size() == 1)
+ MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
+ else
+ MIRBuilder.buildMerge(Dst, SubVectorElts);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+
if (DstTy.isScalar() &&
(SrcTy.isScalar() ||
(SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
@@ -6956,6 +6800,45 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
LLT DstTy = MRI.getType(Src);
LLT InsertTy = MRI.getType(InsertSrc);
+ // Insert sub-vector or one element
+ if (DstTy.isVector() && !InsertTy.isPointer()) {
+ LLT EltTy = DstTy.getElementType();
+ unsigned EltSize = EltTy.getSizeInBits();
+ unsigned InsertSize = InsertTy.getSizeInBits();
+
+ if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
+ (Offset + InsertSize <= DstTy.getSizeInBits())) {
+ auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
+ SmallVector<Register, 8> DstElts;
+ unsigned Idx = 0;
+ // Elements from Src before insert start Offset
+ for (; Idx < Offset / EltSize; ++Idx) {
+ DstElts.push_back(UnmergeSrc.getReg(Idx));
+ }
+
+ // Replace elements in Src with elements from InsertSrc
+ if (InsertTy.getSizeInBits() > EltSize) {
+ auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
+ for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
+ ++Idx, ++i) {
+ DstElts.push_back(UnmergeInsertSrc.getReg(i));
+ }
+ } else {
+ DstElts.push_back(InsertSrc);
+ ++Idx;
+ }
+
+ // Remaining elements from Src after insert
+ for (; Idx < DstTy.getNumElements(); ++Idx) {
+ DstElts.push_back(UnmergeSrc.getReg(Idx));
+ }
+
+ MIRBuilder.buildMerge(Dst, DstElts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+
if (InsertTy.isVector() ||
(DstTy.isVector() && DstTy.getElementType() != InsertTy))
return UnableToLegalize;
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 03dda806cb1e..de8dbd456901 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -554,12 +554,11 @@ bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
bool Changed = false;
// Walk through the block bottom-up, looking for merging candidates.
StoreMergeCandidate Candidate;
- for (auto II = MBB.rbegin(), IE = MBB.rend(); II != IE; ++II) {
- MachineInstr &MI = *II;
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
if (InstsToErase.contains(&MI))
continue;
- if (auto StoreMI = dyn_cast<GStore>(&*II)) {
+ if (auto *StoreMI = dyn_cast<GStore>(&MI)) {
// We have a G_STORE. Add it to the candidate if it writes to an adjacent
// address.
if (!addStoreToCandidate(*StoreMI, Candidate)) {
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index fb5ed35c1f72..391251886fbb 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -215,6 +215,48 @@ MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res,
return buildPtrMask(Res, Op0, MaskReg);
}
+MachineInstrBuilder
+MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res,
+ const SrcOp &Op0) {
+ LLT ResTy = Res.getLLTTy(*getMRI());
+ LLT Op0Ty = Op0.getLLTTy(*getMRI());
+
+ assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
+ "Op0 has more elements");
+
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+ SmallVector<Register, 8> Regs;
+ for (auto Op : Unmerge.getInstr()->defs())
+ Regs.push_back(Op.getReg());
+ Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0);
+ unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size();
+ for (unsigned i = 0; i < NumberOfPadElts; ++i)
+ Regs.push_back(Undef);
+ return buildMerge(Res, Regs);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res,
+ const SrcOp &Op0) {
+ LLT ResTy = Res.getLLTTy(*getMRI());
+ LLT Op0Ty = Op0.getLLTTy(*getMRI());
+
+ assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() < Op0Ty.getNumElements()) &&
+ "Op0 has fewer elements");
+
+ SmallVector<Register, 8> Regs;
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+ for (unsigned i = 0; i < ResTy.getNumElements(); ++i)
+ Regs.push_back(Unmerge.getReg(i));
+ return buildMerge(Res, Regs);
+}
+
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
@@ -613,10 +655,8 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
const SrcOp &Op) {
unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
- SmallVector<Register, 8> TmpVec;
- for (unsigned I = 0; I != NumReg; ++I)
- TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res));
- return buildUnmerge(TmpVec, Op);
+ SmallVector<DstOp, 8> TmpVec(NumReg, Res);
+ return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index b0b84763e922..4981a537dc7c 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -923,6 +923,21 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
return LLT::scalar(LCMSize);
}
+LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) {
+ if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy ||
+ (OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits()))
+ return getLCMType(OrigTy, TargetTy);
+
+ unsigned OrigTyNumElts = OrigTy.getNumElements();
+ unsigned TargetTyNumElts = TargetTy.getNumElements();
+ if (OrigTyNumElts % TargetTyNumElts == 0)
+ return OrigTy;
+
+ unsigned NumElts = alignTo(OrigTyNumElts, TargetTyNumElts);
+ return LLT::scalarOrVector(ElementCount::getFixed(NumElts),
+ OrigTy.getElementType());
+}
+
LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
const unsigned OrigSize = OrigTy.getSizeInBits();
const unsigned TargetSize = TargetTy.getSizeInBits();
@@ -1184,25 +1199,6 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
}
-/// These artifacts generally don't have any debug users because they don't
-/// directly originate from IR instructions, but instead usually from
-/// legalization. Avoiding checking for debug users improves compile time.
-/// Note that truncates or extends aren't included because they have IR
-/// counterparts which can have debug users after translation.
-static bool shouldSkipDbgValueFor(MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case TargetOpcode::G_UNMERGE_VALUES:
- case TargetOpcode::G_MERGE_VALUES:
- case TargetOpcode::G_CONCAT_VECTORS:
- case TargetOpcode::G_BUILD_VECTOR:
- case TargetOpcode::G_EXTRACT:
- case TargetOpcode::G_INSERT:
- return true;
- default:
- return false;
- }
-}
-
void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
LostDebugLocObserver *LocObserver,
SmallInstListTy &DeadInstChain) {
@@ -1212,10 +1208,7 @@ void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
}
LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
DeadInstChain.remove(&MI);
- if (shouldSkipDbgValueFor(MI))
- MI.eraseFromParent();
- else
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ MI.eraseFromParent();
if (LocObserver)
LocObserver->checkpoint(false);
}
diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 0882ce366c9c..fc97938ccd3e 100644
--- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -242,7 +242,7 @@ bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
(void)IsRegMask;
- assert(!llvm::any_of(MI->operands(), IsRegMask) &&
+ assert(llvm::none_of(MI->operands(), IsRegMask) &&
"Calls were filtered out above!");
auto IsUnordered = [](MachineMemOperand *MMO) { return MMO->isUnordered(); };
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index fc5ac45752ca..c975013db8c8 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -686,9 +686,7 @@ void InlineSpiller::reMaterializeAll() {
// Remove any values that were completely rematted.
for (Register Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
- for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
- I != E; ++I) {
- VNInfo *VNI = *I;
+ for (VNInfo *VNI : llvm::make_range(LI.vni_begin(), LI.vni_end())) {
if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
continue;
MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
diff --git a/llvm/lib/CodeGen/InterferenceCache.cpp b/llvm/lib/CodeGen/InterferenceCache.cpp
index a56485cdbc67..3cab9e5734ee 100644
--- a/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -56,8 +56,8 @@ void InterferenceCache::init(MachineFunction *mf,
LIUArray = liuarray;
TRI = tri;
reinitPhysRegEntries();
- for (unsigned i = 0; i != CacheEntries; ++i)
- Entries[i].clear(mf, indexes, lis);
+ for (Entry &E : Entries)
+ E.clear(mf, indexes, lis);
}
InterferenceCache::Entry *InterferenceCache::get(MCRegister PhysReg) {
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index cf62b0e5d7e8..e97dcca201e8 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -1249,8 +1249,8 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8};
Optional<ValueIDNum> Result = None;
Optional<LocIdx> SpillLoc = None;
- for (unsigned int I = 0; I < CandidateSizes.size(); ++I) {
- unsigned SpillID = MTracker->getLocID(SpillNo, {CandidateSizes[I], 0});
+ for (unsigned CS : CandidateSizes) {
+ unsigned SpillID = MTracker->getLocID(SpillNo, {CS, 0});
SpillLoc = MTracker->getSpillMLoc(SpillID);
ValueIDNum Val = MTracker->readMLoc(*SpillLoc);
// If this value was defined in it's own position, then it was probably
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index a632d3d9ce76..b4dd41bbb810 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -492,10 +492,10 @@ private:
static VarLoc CreateCopyLoc(const VarLoc &OldVL, const MachineLoc &OldML,
Register NewReg) {
VarLoc VL = OldVL;
- for (size_t I = 0, E = VL.Locs.size(); I < E; ++I)
- if (VL.Locs[I] == OldML) {
- VL.Locs[I].Kind = MachineLocKind::RegisterKind;
- VL.Locs[I].Value.RegNo = NewReg;
+ for (MachineLoc &ML : VL.Locs)
+ if (ML == OldML) {
+ ML.Kind = MachineLocKind::RegisterKind;
+ ML.Value.RegNo = NewReg;
return VL;
}
llvm_unreachable("Should have found OldML in new VarLoc.");
@@ -506,10 +506,10 @@ private:
static VarLoc CreateSpillLoc(const VarLoc &OldVL, const MachineLoc &OldML,
unsigned SpillBase, StackOffset SpillOffset) {
VarLoc VL = OldVL;
- for (int I = 0, E = VL.Locs.size(); I < E; ++I)
- if (VL.Locs[I] == OldML) {
- VL.Locs[I].Kind = MachineLocKind::SpillLocKind;
- VL.Locs[I].Value.SpillLocation = {SpillBase, SpillOffset};
+ for (MachineLoc &ML : VL.Locs)
+ if (ML == OldML) {
+ ML.Kind = MachineLocKind::SpillLocKind;
+ ML.Value.SpillLocation = {SpillBase, SpillOffset};
return VL;
}
llvm_unreachable("Should have found OldML in new VarLoc.");
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 5f976bf43c5b..e6661e5135c3 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -822,9 +822,6 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// register that hasn't been defined yet. If we do not remove those here, then
// the re-insertion of the DBG_VALUE instruction after register allocation
// will be incorrect.
- // TODO: If earlier passes are corrected to generate sane debug information
- // (and if the machine verifier is improved to catch this), then these checks
- // could be removed or replaced by asserts.
bool Discard = false;
for (const MachineOperand &Op : MI.debug_operands()) {
if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
@@ -1341,8 +1338,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs,
bool DidChange = false;
LocMap::iterator LocMapI;
LocMapI.setMap(locInts);
- for (unsigned i = 0; i != NewRegs.size(); ++i) {
- LiveInterval *LI = &LIS.getInterval(NewRegs[i]);
+ for (Register NewReg : NewRegs) {
+ LiveInterval *LI = &LIS.getInterval(NewReg);
if (LI->empty())
continue;
@@ -1500,8 +1497,8 @@ void LDVImpl::splitRegister(Register OldReg, ArrayRef<Register> NewRegs) {
// Map all of the new virtual registers.
UserValue *UV = lookupVirtReg(OldReg);
- for (unsigned i = 0; i != NewRegs.size(); ++i)
- mapVirtReg(NewRegs[i], UV);
+ for (Register NewReg : NewRegs)
+ mapVirtReg(NewReg, UV);
}
void LiveDebugVariables::
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.h b/llvm/lib/CodeGen/LiveDebugVariables.h
index 07dd3a83866f..9998ce9e8dad 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -56,6 +56,11 @@ private:
bool runOnMachineFunction(MachineFunction &) override;
void releaseMemory() override;
void getAnalysisUsage(AnalysisUsage &) const override;
+
+ MachineFunctionProperties getSetProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 6380c4bfd6e6..05768140cbdf 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -133,6 +133,22 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
if (OVNI != li.getVNInfoAt(UseIdx))
return false;
+
+ // Check that subrange is live at UseIdx.
+ if (MO.getSubReg()) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ LaneBitmask LM = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ for (LiveInterval::SubRange &SR : li.subranges()) {
+ if ((SR.LaneMask & LM).none())
+ continue;
+ if (!SR.liveAt(UseIdx))
+ return false;
+ // Early exit if all used lanes are checked. No need to continue.
+ LM &= ~SR.LaneMask;
+ if (LM.none())
+ break;
+ }
+ }
}
return true;
}
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index e8744797707b..94bdfab5e5e0 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -141,8 +141,8 @@ void LiveVariables::HandleVirtRegUse(Register Reg, MachineBasicBlock *MBB,
}
#ifndef NDEBUG
- for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
- assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+ for (MachineInstr *Kill : VRInfo.Kills)
+ assert(Kill->getParent() != MBB && "entry should be at end!");
#endif
// This situation can occur:
@@ -534,8 +534,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
MachineBasicBlock *MBB = MI.getParent();
// Process all uses.
- for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
- unsigned MOReg = UseRegs[i];
+ for (unsigned MOReg : UseRegs) {
if (Register::isVirtualRegister(MOReg))
HandleVirtRegUse(MOReg, MBB, MI);
else if (!MRI->isReserved(MOReg))
@@ -543,12 +542,11 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
}
// Process all masked registers. (Call clobbers).
- for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
- HandleRegMask(MI.getOperand(RegMasks[i]));
+ for (unsigned Mask : RegMasks)
+ HandleRegMask(MI.getOperand(Mask));
// Process all defs.
- for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
- unsigned MOReg = DefRegs[i];
+ for (unsigned MOReg : DefRegs) {
if (Register::isVirtualRegister(MOReg))
HandleVirtRegDef(MOReg, MI);
else if (!MRI->isReserved(MOReg))
diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index ee2387d1e8e6..37fd3e4853ac 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -210,7 +210,11 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
StackObjSet SmallArrayObjs;
StackObjSet AddrOfObjs;
- AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown, MaxAlign);
+ // Only place the stack protector in the local stack area if the target
+ // allows it.
+ if (TFI.isStackIdSafeForLocalArea(MFI.getStackID(StackProtectorFI)))
+ AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown,
+ MaxAlign);
// Assign large stack objects first.
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 6221b5929301..d0323eaf3d78 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -350,18 +350,33 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
bool HasPHI = false;
bool HasInlineAsm = false;
+ bool AllTiedOpsRewritten = true, HasTiedOps = false;
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
if (MI.isPHI())
HasPHI = true;
if (MI.isInlineAsm())
HasInlineAsm = true;
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ unsigned DefIdx;
+ if (MO.isUse() && MI.isRegTiedToDefOperand(I, &DefIdx)) {
+ HasTiedOps = true;
+ if (MO.getReg() != MI.getOperand(DefIdx).getReg())
+ AllTiedOpsRewritten = false;
+ }
+ }
}
}
if (!HasPHI)
Properties.set(MachineFunctionProperties::Property::NoPHIs);
MF.setHasInlineAsm(HasInlineAsm);
+ if (HasTiedOps && AllTiedOpsRewritten)
+ Properties.set(MachineFunctionProperties::Property::TiedOpsRewritten);
+
if (isSSA(MF))
Properties.set(MachineFunctionProperties::Property::IsSSA);
else
@@ -457,6 +472,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (YamlMF.FailsVerification)
MF.getProperties().set(
MachineFunctionProperties::Property::FailsVerification);
+ if (YamlMF.TracksDebugUserValues)
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target);
if (parseRegisterInfo(PFS, YamlMF))
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index f1369396e37f..dc72f83ad0e4 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -219,6 +219,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
MachineFunctionProperties::Property::FailedISel);
YamlMF.FailsVerification = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailsVerification);
+ YamlMF.TracksDebugUserValues = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
MachineModuleSlotTracker MST(&MF);
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 23c511aaa056..8c9d00d08c6a 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -193,7 +193,7 @@ void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList,
void ilist_traits<MachineInstr>::deleteNode(MachineInstr *MI) {
assert(!MI->getParent() && "MI is still in a block!");
- Parent->getParent()->DeleteMachineInstr(MI);
+ Parent->getParent()->deleteMachineInstr(MI);
}
MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
@@ -1038,16 +1038,15 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
// Collect a list of virtual registers killed by the terminators.
SmallVector<Register, 4> KilledRegs;
if (LV)
- for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
- I != E; ++I) {
- MachineInstr *MI = &*I;
- for (MachineOperand &MO : MI->operands()) {
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end())) {
+ for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse() || !MO.isKill() ||
MO.isUndef())
continue;
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg) ||
- LV->getVarInfo(Reg).removeKill(*MI)) {
+ LV->getVarInfo(Reg).removeKill(MI)) {
KilledRegs.push_back(Reg);
LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI);
MO.setIsKill(false);
@@ -1057,11 +1056,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
SmallVector<Register, 4> UsedRegs;
if (LIS) {
- for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
- I != E; ++I) {
- MachineInstr *MI = &*I;
-
- for (const MachineOperand &MO : MI->operands()) {
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end())) {
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.getReg() == 0)
continue;
@@ -1078,9 +1075,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
// SlotIndexes.
SmallVector<MachineInstr*, 4> Terminators;
if (Indexes) {
- for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
- I != E; ++I)
- Terminators.push_back(&*I);
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end()))
+ Terminators.push_back(&MI);
}
// Since we replaced all uses of Succ with NMBB, that should also be treated
@@ -1091,9 +1088,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
if (Indexes) {
SmallVector<MachineInstr*, 4> NewTerminators;
- for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
- I != E; ++I)
- NewTerminators.push_back(&*I);
+ for (MachineInstr &MI :
+ llvm::make_range(getFirstInstrTerminator(), instr_end()))
+ NewTerminators.push_back(&MI);
for (MachineInstr *Terminator : Terminators) {
if (!is_contained(NewTerminators, Terminator))
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 8a1b4031642d..692587cd58fa 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -61,6 +61,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/CodeLayout.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -193,6 +194,11 @@ static cl::opt<unsigned> TriangleChainCount(
cl::init(2),
cl::Hidden);
+static cl::opt<bool> EnableExtTspBlockPlacement(
+ "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
+ cl::desc("Enable machine block placement based on the ext-tsp model, "
+ "optimizing I-cache utilization."));
+
namespace llvm {
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
@@ -557,6 +563,15 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// but a local analysis would not find them.
void precomputeTriangleChains();
+ /// Apply a post-processing step optimizing block placement.
+ void applyExtTsp();
+
+ /// Modify the existing block placement in the function and adjust all jumps.
+ void assignBlockOrder(const std::vector<const MachineBasicBlock *> &NewOrder);
+
+ /// Create a single CFG chain from the current block order.
+ void createCFGChainExtTsp();
+
public:
static char ID; // Pass identification, replacement for typeid
@@ -3387,6 +3402,15 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // Apply a post-processing optimizing block placement.
+ if (MF.size() >= 3 && EnableExtTspBlockPlacement) {
+ // Find a new placement and modify the layout of the blocks in the function.
+ applyExtTsp();
+
+ // Re-create CFG chain so that we can optimizeBranches and alignBlocks.
+ createCFGChainExtTsp();
+ }
+
optimizeBranches();
alignBlocks();
@@ -3413,12 +3437,147 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
MBFI->view("MBP." + MF.getName(), false);
}
-
// We always return true as we have no way to track whether the final order
// differs from the original order.
return true;
}
+void MachineBlockPlacement::applyExtTsp() {
+ // Prepare data; blocks are indexed by their index in the current ordering.
+ DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;
+ BlockIndex.reserve(F->size());
+ std::vector<const MachineBasicBlock *> CurrentBlockOrder;
+ CurrentBlockOrder.reserve(F->size());
+ size_t NumBlocks = 0;
+ for (const MachineBasicBlock &MBB : *F) {
+ BlockIndex[&MBB] = NumBlocks++;
+ CurrentBlockOrder.push_back(&MBB);
+ }
+
+ auto BlockSizes = std::vector<uint64_t>(F->size());
+ auto BlockCounts = std::vector<uint64_t>(F->size());
+ DenseMap<std::pair<uint64_t, uint64_t>, uint64_t> JumpCounts;
+ for (MachineBasicBlock &MBB : *F) {
+ // Getting the block frequency.
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+ BlockCounts[BlockIndex[&MBB]] = BlockFreq.getFrequency();
+ // Getting the block size:
+ // - approximate the size of an instruction by 4 bytes, and
+ // - ignore debug instructions.
+ // Note: getting the exact size of each block is target-dependent and can be
+ // done by extending the interface of MCCodeEmitter. Experimentally we do
+ // not see a perf improvement with the exact block sizes.
+ auto NonDbgInsts =
+ instructionsWithoutDebug(MBB.instr_begin(), MBB.instr_end());
+ int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
+ BlockSizes[BlockIndex[&MBB]] = 4 * NumInsts;
+ // Getting jump frequencies.
+ for (MachineBasicBlock *Succ : MBB.successors()) {
+ auto EP = MBPI->getEdgeProbability(&MBB, Succ);
+ BlockFrequency EdgeFreq = BlockFreq * EP;
+ auto Edge = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]);
+ JumpCounts[Edge] = EdgeFreq.getFrequency();
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Applying ext-tsp layout for |V| = " << F->size()
+ << " with profile = " << F->getFunction().hasProfileData()
+ << " (" << F->getName().str() << ")"
+ << "\n");
+ LLVM_DEBUG(
+ dbgs() << format(" original layout score: %0.2f\n",
+ calcExtTspScore(BlockSizes, BlockCounts, JumpCounts)));
+
+ // Run the layout algorithm.
+ auto NewOrder = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
+ std::vector<const MachineBasicBlock *> NewBlockOrder;
+ NewBlockOrder.reserve(F->size());
+ for (uint64_t Node : NewOrder) {
+ NewBlockOrder.push_back(CurrentBlockOrder[Node]);
+ }
+ LLVM_DEBUG(dbgs() << format(" optimized layout score: %0.2f\n",
+ calcExtTspScore(NewOrder, BlockSizes, BlockCounts,
+ JumpCounts)));
+
+ // Assign new block order.
+ assignBlockOrder(NewBlockOrder);
+}
+
+void MachineBlockPlacement::assignBlockOrder(
+ const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
+ assert(F->size() == NewBlockOrder.size() && "Incorrect size of block order");
+ F->RenumberBlocks();
+
+ bool HasChanges = false;
+ for (size_t I = 0; I < NewBlockOrder.size(); I++) {
+ if (NewBlockOrder[I] != F->getBlockNumbered(I)) {
+ HasChanges = true;
+ break;
+ }
+ }
+ // Stop early if the new block order is identical to the existing one.
+ if (!HasChanges)
+ return;
+
+ SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(F->getNumBlockIDs());
+ for (auto &MBB : *F) {
+ PrevFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
+ }
+
+ // Sort basic blocks in the function according to the computed order.
+ DenseMap<const MachineBasicBlock *, size_t> NewIndex;
+ for (const MachineBasicBlock *MBB : NewBlockOrder) {
+ NewIndex[MBB] = NewIndex.size();
+ }
+ F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {
+ return NewIndex[&L] < NewIndex[&R];
+ });
+
+ // Update basic block branches by inserting explicit fallthrough branches
+ // when required and re-optimize branches when possible.
+ const TargetInstrInfo *TII = F->getSubtarget().getInstrInfo();
+ SmallVector<MachineOperand, 4> Cond;
+ for (auto &MBB : *F) {
+ MachineFunction::iterator NextMBB = std::next(MBB.getIterator());
+ MachineFunction::iterator EndIt = MBB.getParent()->end();
+ auto *FTMBB = PrevFallThroughs[MBB.getNumber()];
+ // If this block had a fallthrough before we need an explicit unconditional
+ // branch to that block if the fallthrough block is not adjacent to the
+ // block in the new order.
+ if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
+ TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc());
+ }
+
+ // It might be possible to optimize branches by flipping the condition.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ if (TII->analyzeBranch(MBB, TBB, FBB, Cond))
+ continue;
+ MBB.updateTerminator(FTMBB);
+ }
+
+#ifndef NDEBUG
+ // Make sure we correctly constructed all branches.
+ F->verify(this, "After optimized block reordering");
+#endif
+}
+
+void MachineBlockPlacement::createCFGChainExtTsp() {
+ BlockToChain.clear();
+ ComputedEdges.clear();
+ ChainAllocator.DestroyAll();
+
+ MachineBasicBlock *HeadBB = &F->front();
+ BlockChain *FunctionChain =
+ new (ChainAllocator.Allocate()) BlockChain(BlockToChain, HeadBB);
+
+ for (MachineBasicBlock &MBB : *F) {
+ if (HeadBB == &MBB)
+ continue; // Ignore head of the chain
+ FunctionChain->merge(&MBB, nullptr);
+ }
+}
+
namespace {
/// A pass to compute block placement statistics.
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index e2b6cfe55c16..72ab9ee4f388 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -485,7 +485,7 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
for (auto *InstrPtr : DelInstrs) {
- InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+ InstrPtr->eraseFromParent();
// Erase all LiveRegs defined by the removed instruction
for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
if (I->MI == InstrPtr)
@@ -693,7 +693,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// use for them.
MachineFunction *MF = MBB->getParent();
for (auto *InstrPtr : InsInstrs)
- MF->DeleteMachineInstr(InstrPtr);
+ MF->deleteMachineInstr(InstrPtr);
}
InstrIdxForVirtReg.clear();
}
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 7c83bacd80d9..57fbe4112e47 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -847,31 +847,27 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName()
<< "\n");
- for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
- I != E;) {
- MachineInstr *MI = &*I;
- ++I;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
// Ignore non-trivial COPYs.
- if (MI->isCopy() && MI->getNumOperands() == 2 &&
- !TRI->regsOverlap(MI->getOperand(0).getReg(),
- MI->getOperand(1).getReg())) {
+ if (MI.isCopy() && MI.getNumOperands() == 2 &&
+ !TRI->regsOverlap(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg())) {
- MCRegister Def = MI->getOperand(0).getReg().asMCReg();
- MCRegister Src = MI->getOperand(1).getReg().asMCReg();
+ MCRegister Def = MI.getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI.getOperand(1).getReg().asMCReg();
// Unlike forward cp, we don't invoke propagateDefs here,
// just let forward cp do COPY-to-COPY propagation.
- if (isBackwardPropagatableCopy(*MI, *MRI)) {
+ if (isBackwardPropagatableCopy(MI, *MRI)) {
Tracker.invalidateRegister(Src, *TRI);
Tracker.invalidateRegister(Def, *TRI);
- Tracker.trackCopy(MI, *TRI);
+ Tracker.trackCopy(&MI, *TRI);
continue;
}
}
// Invalidate any earlyclobber regs first.
- for (const MachineOperand &MO : MI->operands())
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isEarlyClobber()) {
MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
@@ -879,8 +875,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
Tracker.invalidateRegister(Reg, *TRI);
}
- propagateDefs(*MI);
- for (const MachineOperand &MO : MI->operands()) {
+ propagateDefs(MI);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
@@ -898,7 +894,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
for (MCRegUnitIterator RUI(MO.getReg().asMCReg(), TRI); RUI.isValid();
++RUI) {
if (auto *Copy = Tracker.findCopyDefViaUnit(*RUI, *TRI)) {
- CopyDbgUsers[Copy].insert(MI);
+ CopyDbgUsers[Copy].insert(&MI);
}
}
} else {
diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
new file mode 100644
index 000000000000..42a5e2b7af01
--- /dev/null
+++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
@@ -0,0 +1,113 @@
+//===- MachineCycleAnalysis.cpp - Compute CycleInfo for Machine IR --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
+#include "llvm/ADT/GenericCycleImpl.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+template class llvm::GenericCycleInfo<llvm::MachineSSAContext>;
+template class llvm::GenericCycle<llvm::MachineSSAContext>;
+
+namespace {
+
+/// Legacy analysis pass which computes a \ref MachineCycleInfo.
+class MachineCycleInfoWrapperPass : public MachineFunctionPass {
+ MachineFunction *F = nullptr;
+ MachineCycleInfo CI;
+
+public:
+ static char ID;
+
+ MachineCycleInfoWrapperPass();
+
+ MachineCycleInfo &getCycleInfo() { return CI; }
+ const MachineCycleInfo &getCycleInfo() const { return CI; }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override;
+ void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+ // TODO: verify analysis
+};
+
+class MachineCycleInfoPrinterPass : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineCycleInfoPrinterPass();
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+} // namespace
+
+char MachineCycleInfoWrapperPass::ID = 0;
+
+MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineCycleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(MachineCycleInfoWrapperPass, "machine-cycles",
+ "Machine Cycle Info Analysis", true, true)
+INITIALIZE_PASS_END(MachineCycleInfoWrapperPass, "machine-cycles",
+ "Machine Cycle Info Analysis", true, true)
+
+void MachineCycleInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineCycleInfoWrapperPass::runOnMachineFunction(MachineFunction &Func) {
+ CI.clear();
+
+ F = &Func;
+ CI.compute(Func);
+ return false;
+}
+
+void MachineCycleInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
+ OS << "MachineCycleInfo for function: " << F->getName() << "\n";
+ CI.print(OS);
+}
+
+void MachineCycleInfoWrapperPass::releaseMemory() {
+ CI.clear();
+ F = nullptr;
+}
+
+char MachineCycleInfoPrinterPass::ID = 0;
+
+MachineCycleInfoPrinterPass::MachineCycleInfoPrinterPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineCycleInfoPrinterPassPass(*PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(MachineCycleInfoPrinterPass, "print-machine-cycles",
+ "Print Machine Cycle Info Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
+INITIALIZE_PASS_END(MachineCycleInfoPrinterPass, "print-machine-cycles",
+ "Print Machine Cycle Info Analysis", true, true)
+
+void MachineCycleInfoPrinterPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineCycleInfoWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) {
+ auto &CI = getAnalysis<MachineCycleInfoWrapperPass>();
+ CI.print(errs());
+ return false;
+}
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 310c2721c3bd..81ed3d0e93ff 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -89,6 +89,7 @@ static cl::opt<unsigned> AlignAllFunctions(
static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
using P = MachineFunctionProperties::Property;
+ // clang-format off
switch(Prop) {
case P::FailedISel: return "FailedISel";
case P::IsSSA: return "IsSSA";
@@ -100,7 +101,9 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
case P::TracksLiveness: return "TracksLiveness";
case P::TiedOpsRewritten: return "TiedOpsRewritten";
case P::FailsVerification: return "FailsVerification";
+ case P::TracksDebugUserValues: return "TracksDebugUserValues";
}
+ // clang-format on
llvm_unreachable("Invalid machine function property");
}
@@ -125,7 +128,7 @@ void MachineFunctionProperties::print(raw_ostream &OS) const {
MachineFunctionInfo::~MachineFunctionInfo() = default;
void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
- MBB->getParent()->DeleteMachineBasicBlock(MBB);
+ MBB->getParent()->deleteMachineBasicBlock(MBB);
}
static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
@@ -347,10 +350,10 @@ void MachineFunction::assignBeginEndSections() {
/// Allocate a new MachineInstr. Use this instead of `new MachineInstr'.
MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
- const DebugLoc &DL,
+ DebugLoc DL,
bool NoImplicit) {
return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
- MachineInstr(*this, MCID, DL, NoImplicit);
+ MachineInstr(*this, MCID, std::move(DL), NoImplicit);
}
/// Create a new MachineInstr which is a copy of the 'Orig' instruction,
@@ -361,8 +364,9 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
MachineInstr(*this, *Orig);
}
-MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) {
+MachineInstr &MachineFunction::cloneMachineInstrBundle(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ const MachineInstr &Orig) {
MachineInstr *FirstClone = nullptr;
MachineBasicBlock::const_instr_iterator I = Orig.getIterator();
while (true) {
@@ -390,8 +394,7 @@ MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB,
///
/// This function also serves as the MachineInstr destructor - the real
/// ~MachineInstr() destructor must be empty.
-void
-MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+void MachineFunction::deleteMachineInstr(MachineInstr *MI) {
// Verify that a call site info is at valid state. This assertion should
// be triggered during the implementation of support for the
// call site info of a new architecture. If the assertion is triggered,
@@ -418,8 +421,7 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
}
/// Delete the given MachineBasicBlock.
-void
-MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+void MachineFunction::deleteMachineBasicBlock(MachineBasicBlock *MBB) {
assert(MBB->getParent() == this && "MBB parent mismatch!");
// Clean up any references to MBB in jump tables before deleting it.
if (JumpTableInfo)
@@ -769,8 +771,8 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
void MachineFunction::addCatchTypeInfo(MachineBasicBlock *LandingPad,
ArrayRef<const GlobalValue *> TyInfo) {
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- for (unsigned N = TyInfo.size(); N; --N)
- LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+ for (const GlobalValue *GV : llvm::reverse(TyInfo))
+ LP.TypeIds.push_back(getTypeIDFor(GV));
}
void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad,
@@ -1404,10 +1406,10 @@ MachineConstantPool::~MachineConstantPool() {
// A constant may be a member of both Constants and MachineCPVsSharingEntries,
// so keep track of which we've deleted to avoid double deletions.
DenseSet<MachineConstantPoolValue*> Deleted;
- for (unsigned i = 0, e = Constants.size(); i != e; ++i)
- if (Constants[i].isMachineConstantPoolEntry()) {
- Deleted.insert(Constants[i].Val.MachineCPVal);
- delete Constants[i].Val.MachineCPVal;
+ for (const MachineConstantPoolEntry &C : Constants)
+ if (C.isMachineConstantPoolEntry()) {
+ Deleted.insert(C.Val.MachineCPVal);
+ delete C.Val.MachineCPVal;
}
for (MachineConstantPoolValue *CPV : MachineCPVsSharingEntries) {
if (Deleted.count(CPV) == 0)
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index aaa80432d2f2..85b266afceef 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -115,10 +115,10 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
/// implicit operands. It reserves space for the number of operands specified by
/// the MCInstrDesc.
-MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
- DebugLoc dl, bool NoImp)
- : MCID(&tid), debugLoc(std::move(dl)), DebugInstrNum(0) {
- assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID,
+ DebugLoc DL, bool NoImp)
+ : MCID(&TID), DbgLoc(std::move(DL)), DebugInstrNum(0) {
+ assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
if (unsigned NumOps = MCID->getNumOperands() +
@@ -135,9 +135,9 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
/// Does not copy the number from debug instruction numbering, to preserve
/// uniqueness.
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()),
+ : MCID(&MI.getDesc()), Info(MI.Info), DbgLoc(MI.getDebugLoc()),
DebugInstrNum(0) {
- assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+ assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
Operands = MF.allocateOperandArray(CapOperands);
@@ -682,26 +682,6 @@ void MachineInstr::eraseFromParent() {
getParent()->erase(this);
}
-void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() {
- assert(getParent() && "Not embedded in a basic block!");
- MachineBasicBlock *MBB = getParent();
- MachineFunction *MF = MBB->getParent();
- assert(MF && "Not embedded in a function!");
-
- MachineInstr *MI = (MachineInstr *)this;
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
- Register Reg = MO.getReg();
- if (!Reg.isVirtual())
- continue;
- MRI.markUsesInDebugValueAsUndef(Reg);
- }
- MI->eraseFromParent();
-}
-
void MachineInstr::eraseFromBundle() {
assert(getParent() && "Not embedded in a basic block!");
getParent()->erase_instr(this);
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 8d6459a627fa..762395542b40 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -649,7 +649,7 @@ void SwingSchedulerDAG::schedule() {
/// Clean up after the software pipeliner runs.
void SwingSchedulerDAG::finishBlock() {
for (auto &KV : NewMIs)
- MF.DeleteMachineInstr(KV.second);
+ MF.deleteMachineInstr(KV.second);
NewMIs.clear();
// Call the superclass.
@@ -1101,17 +1101,15 @@ unsigned SwingSchedulerDAG::calculateResMII() {
// Sort the instructions by the number of available choices for scheduling,
// least to most. Use the number of critical resources as the tie breaker.
FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
- for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
- E = MBB->getFirstTerminator();
- I != E; ++I)
- FUS.calcCriticalResources(*I);
+ for (MachineInstr &MI :
+ llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator()))
+ FUS.calcCriticalResources(MI);
PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
FuncUnitOrder(FUS);
- for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
- E = MBB->getFirstTerminator();
- I != E; ++I)
- FuncUnitOrder.push(&*I);
+ for (MachineInstr &MI :
+ llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator()))
+ FuncUnitOrder.push(&MI);
while (!FuncUnitOrder.empty()) {
MachineInstr *MI = FuncUnitOrder.top();
@@ -1192,14 +1190,10 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
/// but we do this to find the circuits, and then change them back.
static void swapAntiDependences(std::vector<SUnit> &SUnits) {
SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- SUnit *SU = &SUnits[i];
- for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
- IP != EP; ++IP) {
- if (IP->getKind() != SDep::Anti)
- continue;
- DepsAdded.push_back(std::make_pair(SU, *IP));
- }
+ for (SUnit &SU : SUnits) {
+ for (SDep &Pred : SU.Preds)
+ if (Pred.getKind() == SDep::Anti)
+ DepsAdded.push_back(std::make_pair(&SU, Pred));
}
for (std::pair<SUnit *, SDep> &P : DepsAdded) {
// Remove this anti dependency and add one in the reverse direction.
@@ -1471,27 +1465,23 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
}
// Compute ALAP, ZeroLatencyHeight, and MOV.
- for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),
- E = Topo.rend();
- I != E; ++I) {
+ for (int I : llvm::reverse(Topo)) {
int alap = maxASAP;
int zeroLatencyHeight = 0;
- SUnit *SU = &SUnits[*I];
- for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
- ES = SU->Succs.end();
- IS != ES; ++IS) {
- SUnit *succ = IS->getSUnit();
- if (IS->getLatency() == 0)
+ SUnit *SU = &SUnits[I];
+ for (const SDep &S : SU->Succs) {
+ SUnit *succ = S.getSUnit();
+ if (S.getLatency() == 0)
zeroLatencyHeight =
std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
- if (ignoreDependence(*IS, true))
+ if (ignoreDependence(S, true))
continue;
- alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
- getDistance(SU, succ, *IS) * MII));
+ alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() +
+ getDistance(SU, succ, S) * MII));
}
- ScheduleInfo[*I].ALAP = alap;
- ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
+ ScheduleInfo[I].ALAP = alap;
+ ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;
}
// After computing the node functions, compute the summary for each node set.
@@ -1548,9 +1538,8 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder,
SmallSetVector<SUnit *, 8> &Succs,
const NodeSet *S = nullptr) {
Succs.clear();
- for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
- I != E; ++I) {
- for (SDep &Succ : (*I)->Succs) {
+ for (const SUnit *SU : NodeOrder) {
+ for (const SDep &Succ : SU->Succs) {
if (S && S->count(Succ.getSUnit()) == 0)
continue;
if (ignoreDependence(Succ, false))
@@ -1558,7 +1547,7 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder,
if (NodeOrder.count(Succ.getSUnit()) == 0)
Succs.insert(Succ.getSUnit());
}
- for (SDep &Pred : (*I)->Preds) {
+ for (const SDep &Pred : SU->Preds) {
if (Pred.getKind() != SDep::Anti)
continue;
if (S && S->count(Pred.getSUnit()) == 0)
@@ -2202,7 +2191,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
MachineInstr *NewMI = MF.CloneMachineInstr(MI);
NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
- MF.DeleteMachineInstr(NewMI);
+ MF.deleteMachineInstr(NewMI);
if (!Disjoint)
return false;
@@ -2885,10 +2874,8 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
++stage) {
std::deque<SUnit *> &cycleInstrs =
ScheduledInstrs[cycle + (stage * InitiationInterval)];
- for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
- E = cycleInstrs.rend();
- I != E; ++I)
- ScheduledInstrs[cycle].push_front(*I);
+ for (SUnit *SU : llvm::reverse(cycleInstrs))
+ ScheduledInstrs[cycle].push_front(SU);
}
}
@@ -2899,10 +2886,8 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
// Change the registers in instruction as specified in the InstrChanges
// map. We need to use the new registers to create the correct order.
- for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
- SUnit *SU = &SSD->SUnits[i];
- SSD->applyInstrChange(SU->getInstr(), *this);
- }
+ for (const SUnit &SU : SSD->SUnits)
+ SSD->applyInstrChange(SU.getInstr(), *this);
// Reorder the instructions in each cycle to fix and improve the
// generated code.
diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp
new file mode 100644
index 000000000000..8db893535daf
--- /dev/null
+++ b/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -0,0 +1,52 @@
+//===- MachineSSAContext.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a specialization of the GenericSSAContext<X>
+/// template class for Machine IR.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+MachineBasicBlock *MachineSSAContext::getEntryBlock(MachineFunction &F) {
+ return &F.front();
+}
+
+void MachineSSAContext::setFunction(MachineFunction &Fn) {
+ MF = &Fn;
+ RegInfo = &MF->getRegInfo();
+}
+
+Printable MachineSSAContext::print(MachineBasicBlock *Block) const {
+ return Printable([Block](raw_ostream &Out) { Block->printName(Out); });
+}
+
+Printable MachineSSAContext::print(MachineInstr *I) const {
+ return Printable([I](raw_ostream &Out) { I->print(Out); });
+}
+
+Printable MachineSSAContext::print(Register Value) const {
+ auto *MRI = RegInfo;
+ return Printable([MRI, Value](raw_ostream &Out) {
+ Out << printReg(Value, MRI->getTargetRegisterInfo(), 0, MRI);
+
+ if (Value) {
+ // Try to print the definition.
+ if (auto *Instr = MRI->getUniqueVRegDef(Value)) {
+ Out << ": ";
+ Instr->print(Out);
+ }
+ }
+ });
+}
diff --git a/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 930677e4fd7d..48076663ddf5 100644
--- a/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -126,7 +126,9 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode,
}
/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
-/// is live in the middle of the specified block.
+/// is live in the middle of the specified block. If ExistingValueOnly is
+/// true then this will only return an existing value or $noreg; otherwise new
+/// instructions may be inserted to materialize a value.
///
/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
/// important case: if there is a definition of the rewritten value after the
@@ -143,14 +145,18 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode,
/// their respective blocks. However, the use of X happens in the *middle* of
/// a block. Because of this, we need to insert a new PHI node in SomeBB to
/// merge the appropriate values, and this value isn't live out of the block.
-Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB,
+ bool ExistingValueOnly) {
// If there is no definition of the renamed variable in this block, just use
// GetValueAtEndOfBlock to do our work.
if (!HasValueForBlock(BB))
- return GetValueAtEndOfBlockInternal(BB);
+ return GetValueAtEndOfBlockInternal(BB, ExistingValueOnly);
// If there are no predecessors, just return undef.
if (BB->pred_empty()) {
+ // If we cannot insert new instructions, just return $noreg.
+ if (ExistingValueOnly)
+ return Register();
// Insert an implicit_def to represent an undef value.
MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
BB, BB->getFirstTerminator(),
@@ -165,7 +171,7 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
bool isFirstPred = true;
for (MachineBasicBlock *PredBB : BB->predecessors()) {
- Register PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ Register PredVal = GetValueAtEndOfBlockInternal(PredBB, ExistingValueOnly);
PredValues.push_back(std::make_pair(PredBB, PredVal));
// Compute SingularValue.
@@ -185,6 +191,10 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
if (DupPHI)
return DupPHI;
+ // If we cannot create new instructions, return $noreg now.
+ if (ExistingValueOnly)
+ return Register();
+
// Otherwise, we do need a PHI: insert one now.
MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
@@ -350,10 +360,13 @@ public:
/// for the specified BB and if so, return it. If not, construct SSA form by
/// first calculating the required placement of PHIs and then inserting new
/// PHIs where needed.
-Register MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+Register
+MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB,
+ bool ExistingValueOnly) {
AvailableValsTy &AvailableVals = getAvailableVals(AV);
- if (Register V = AvailableVals[BB])
- return V;
+ Register ExistingVal = AvailableVals.lookup(BB);
+ if (ExistingVal || ExistingValueOnly)
+ return ExistingVal;
SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
return Impl.GetValue(BB);
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 47d40f0823c8..b043d4c1b0c1 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -90,12 +90,17 @@ cl::opt<bool> VerifyScheduling(
"verify-misched", cl::Hidden,
cl::desc("Verify machine instrs before and after machine scheduling"));
+#ifndef NDEBUG
+cl::opt<bool> ViewMISchedDAGs(
+ "view-misched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show MISched dags after they are processed"));
+#else
+const bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
} // end namespace llvm
#ifndef NDEBUG
-static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
- cl::desc("Pop up a window to show MISched dags after they are processed"));
-
/// In some situations a few uninteresting nodes depend on nearly all other
/// nodes in the graph, provide a cutoff to hide them.
static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
@@ -111,7 +116,6 @@ static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
cl::desc("Print schedule DAGs"));
#else
-static const bool ViewMISchedDAGs = false;
static const bool PrintDAGs = false;
#endif // NDEBUG
@@ -561,11 +565,10 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
MBBRegionsVector MBBRegions;
getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
- for (MBBRegionsVector::iterator R = MBBRegions.begin();
- R != MBBRegions.end(); ++R) {
- MachineBasicBlock::iterator I = R->RegionBegin;
- MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
- unsigned NumRegionInstrs = R->NumRegionInstrs;
+ for (const SchedRegion &R : MBBRegions) {
+ MachineBasicBlock::iterator I = R.RegionBegin;
+ MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
+ unsigned NumRegionInstrs = R.NumRegionInstrs;
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 8df23b781ffd..0a5ff276fedc 100644
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -80,9 +80,9 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
void MachineTraceMetrics::releaseMemory() {
MF = nullptr;
BlockInfo.clear();
- for (unsigned i = 0; i != TS_NumStrategies; ++i) {
- delete Ensembles[i];
- Ensembles[i] = nullptr;
+ for (Ensemble *&E : Ensembles) {
+ delete E;
+ E = nullptr;
}
}
@@ -398,9 +398,9 @@ void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
<< '\n');
BlockInfo[MBB->getNumber()].invalidate();
- for (unsigned i = 0; i != TS_NumStrategies; ++i)
- if (Ensembles[i])
- Ensembles[i]->invalidate(MBB);
+ for (Ensemble *E : Ensembles)
+ if (E)
+ E->invalidate(MBB);
}
void MachineTraceMetrics::verifyAnalysis() const {
@@ -408,9 +408,9 @@ void MachineTraceMetrics::verifyAnalysis() const {
return;
#ifndef NDEBUG
assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
- for (unsigned i = 0; i != TS_NumStrategies; ++i)
- if (Ensembles[i])
- Ensembles[i]->verify();
+ for (Ensemble *E : Ensembles)
+ if (E)
+ E->verify();
#endif
}
@@ -984,8 +984,7 @@ addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
const MachineBasicBlock *DefMBB = DefMI->getParent();
// Reg is live-in to all blocks in Trace that follow DefMBB.
- for (unsigned i = Trace.size(); i; --i) {
- const MachineBasicBlock *MBB = Trace[i-1];
+ for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
if (MBB == DefMBB)
return;
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
@@ -1204,8 +1203,8 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
for (unsigned K = 0; K != PRDepths.size(); ++K)
PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
} else {
- for (unsigned K = 0; K != PRDepths.size(); ++K)
- PRMax = std::max(PRMax, PRDepths[K]);
+ for (unsigned PRD : PRDepths)
+ PRMax = std::max(PRMax, PRD);
}
// Convert to cycle count.
PRMax = TE.MTM.getCycles(PRMax);
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 32078db76cf3..005d4ad1a328 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -101,6 +101,7 @@ namespace {
// Avoid querying the MachineFunctionProperties for each operand.
bool isFunctionRegBankSelected;
bool isFunctionSelected;
+ bool isFunctionTracksDebugUserValues;
using RegVector = SmallVector<Register, 16>;
using RegMaskVector = SmallVector<const uint32_t *, 4>;
@@ -384,6 +385,8 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) {
MachineFunctionProperties::Property::RegBankSelected);
isFunctionSelected = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Selected);
+ isFunctionTracksDebugUserValues = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksDebugUserValues);
LiveVars = nullptr;
LiveInts = nullptr;
@@ -1605,12 +1608,16 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
break;
}
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR:
case TargetOpcode::G_ROTR:
case TargetOpcode::G_ROTL: {
LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg());
LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg());
if (Src1Ty.isVector() != Src2Ty.isVector()) {
- report("Rotate requires operands to be either all scalars or all vectors",
+ report("Shifts and rotates require operands to be either all scalars or "
+ "all vectors",
MI);
break;
}
@@ -1980,41 +1987,50 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MO->isUndef())
report("Generic virtual register use cannot be undef", MO, MONum);
- // If we're post-Select, we can't have gvregs anymore.
- if (isFunctionSelected) {
- report("Generic virtual register invalid in a Selected function",
- MO, MONum);
- return;
- }
+ // Debug value instruction is permitted to use undefined vregs.
+ // This is a performance measure to skip the overhead of immediately
+ // pruning unused debug operands. The final undef substitution occurs
+ // when debug values are allocated in LDVImpl::handleDebugValue, so
+ // these verifications always apply after this pass.
+ if (isFunctionTracksDebugUserValues || !MO->isUse() ||
+ !MI->isDebugValue() || !MRI->def_empty(Reg)) {
+ // If we're post-Select, we can't have gvregs anymore.
+ if (isFunctionSelected) {
+ report("Generic virtual register invalid in a Selected function",
+ MO, MONum);
+ return;
+ }
- // The gvreg must have a type and it must not have a SubIdx.
- LLT Ty = MRI->getType(Reg);
- if (!Ty.isValid()) {
- report("Generic virtual register must have a valid type", MO,
- MONum);
- return;
- }
+ // The gvreg must have a type and it must not have a SubIdx.
+ LLT Ty = MRI->getType(Reg);
+ if (!Ty.isValid()) {
+ report("Generic virtual register must have a valid type", MO,
+ MONum);
+ return;
+ }
- const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
+ const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
- // If we're post-RegBankSelect, the gvreg must have a bank.
- if (!RegBank && isFunctionRegBankSelected) {
- report("Generic virtual register must have a bank in a "
- "RegBankSelected function",
- MO, MONum);
- return;
- }
+ // If we're post-RegBankSelect, the gvreg must have a bank.
+ if (!RegBank && isFunctionRegBankSelected) {
+ report("Generic virtual register must have a bank in a "
+ "RegBankSelected function",
+ MO, MONum);
+ return;
+ }
- // Make sure the register fits into its register bank if any.
- if (RegBank && Ty.isValid() &&
- RegBank->getSize() < Ty.getSizeInBits()) {
- report("Register bank is too small for virtual register", MO,
- MONum);
- errs() << "Register bank " << RegBank->getName() << " too small("
- << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
- << "-bits\n";
- return;
+ // Make sure the register fits into its register bank if any.
+ if (RegBank && Ty.isValid() &&
+ RegBank->getSize() < Ty.getSizeInBits()) {
+ report("Register bank is too small for virtual register", MO,
+ MONum);
+ errs() << "Register bank " << RegBank->getName() << " too small("
+ << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
+ << "-bits\n";
+ return;
+ }
}
+
if (SubIdx) {
report("Generic virtual register does not allow subregister index", MO,
MONum);
@@ -2217,8 +2233,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (LiveInts && Reg.isVirtual()) {
if (LiveInts->hasInterval(Reg)) {
LI = &LiveInts->getInterval(Reg);
- if (SubRegIdx != 0 && !LI->empty() && !LI->hasSubRanges() &&
- MRI->shouldTrackSubRegLiveness(Reg))
+ if (SubRegIdx != 0 && (MO->isDef() || !MO->isUndef()) && !LI->empty() &&
+ !LI->hasSubRanges() && MRI->shouldTrackSubRegLiveness(Reg))
report("Live interval for subreg operand has no subranges", MO, MONum);
} else {
report("Virtual register has no live interval", MO, MONum);
diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp
index 77a6c37e1362..7693ab417de9 100644
--- a/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/llvm/lib/CodeGen/PHIElimination.cpp
@@ -213,7 +213,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
for (auto &I : LoweredPHIs) {
if (LIS)
LIS->RemoveMachineInstrFromMaps(*I.first);
- MF.DeleteMachineInstr(I.first);
+ MF.deleteMachineInstr(I.first);
}
// TODO: we should use the incremental DomTree updater here.
@@ -626,7 +626,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (reusedIncoming || !IncomingReg) {
if (LIS)
LIS->RemoveMachineInstrFromMaps(*MPhi);
- MF.DeleteMachineInstr(MPhi);
+ MF.deleteMachineInstr(MPhi);
}
}
diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp
index b85f00a61eac..d7cd0a583cee 100644
--- a/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -252,8 +252,8 @@ void SchedulePostRATDList::exitRegion() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dumpSchedule - dump the scheduled Sequence.
LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
+ for (const SUnit *SU : Sequence) {
+ if (SU)
dumpNode(*SU);
else
dbgs() << "**** NOOP ****\n";
@@ -531,11 +531,11 @@ void SchedulePostRATDList::ListScheduleTopDown() {
ReleaseSuccessors(&EntrySU);
// Add all leaves to Available queue.
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ for (SUnit &SUnit : SUnits) {
// It is available if it has no predecessors.
- if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) {
- AvailableQueue.push(&SUnits[i]);
- SUnits[i].isAvailable = true;
+ if (!SUnit.NumPredsLeft && !SUnit.isAvailable) {
+ AvailableQueue.push(&SUnit);
+ SUnit.isAvailable = true;
}
}
@@ -657,10 +657,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
#ifndef NDEBUG
unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
- unsigned Noops = 0;
- for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
- if (!Sequence[i])
- ++Noops;
+ unsigned Noops = llvm::count(Sequence, nullptr);
assert(Sequence.size() - Noops == ScheduledNodes &&
"The number of nodes scheduled doesn't match the expected number!");
#endif // NDEBUG
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 29a88480fd9f..8d8a6126dad0 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -953,12 +953,22 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// LocalStackSlotPass didn't already allocate a slot for it.
// If we are told to use the LocalStackAllocationBlock, the stack protector
// is expected to be already pre-allocated.
- if (!MFI.getUseLocalStackAllocationBlock())
+ if (MFI.getStackID(StackProtectorFI) != TargetStackID::Default) {
+ // If the stack protector isn't on the default stack then it's up to the
+ // target to set the stack offset.
+ assert(MFI.getObjectOffset(StackProtectorFI) != 0 &&
+ "Offset of stack protector on non-default stack expected to be "
+ "already set.");
+ assert(!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()) &&
+ "Stack protector on non-default stack expected to not be "
+ "pre-allocated by LocalStackSlotPass.");
+ } else if (!MFI.getUseLocalStackAllocationBlock()) {
AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
Skew);
- else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()))
+ } else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex())) {
llvm_unreachable(
"Stack protector not pre-allocated by LocalStackSlotPass.");
+ }
// Assign large stack objects first.
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp
index f605068e076d..882f8e91bf1d 100644
--- a/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/llvm/lib/CodeGen/RDFGraph.cpp
@@ -1500,8 +1500,8 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
// Erase from MaxRefs all elements in the closure.
auto Begin = MaxRefs.begin();
- for (unsigned i = ClosureIdx.size(); i != 0; --i)
- MaxRefs.erase(Begin + ClosureIdx[i-1]);
+ for (unsigned Idx : llvm::reverse(ClosureIdx))
+ MaxRefs.erase(Begin + Idx);
}
}
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
new file mode 100644
index 000000000000..9f1012c95964
--- /dev/null
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -0,0 +1,121 @@
+//===- RegAllocEvictionAdvisor.cpp - eviction advisor ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the default eviction advisor and of the Analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocEvictionAdvisor.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<RegAllocEvictionAdvisorAnalysis::AdvisorMode> Mode(
+ "regalloc-enable-advisor", cl::Hidden,
+ cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default),
+ cl::desc("Enable regalloc advisor mode"),
+ cl::values(
+ clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default,
+ "default", "Default"),
+ clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release,
+ "release", "precompiled"),
+ clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development,
+ "development", "for training")));
+
+static cl::opt<bool> EnableLocalReassignment(
+ "enable-local-reassign", cl::Hidden,
+ cl::desc("Local reassignment can yield better allocation decisions, but "
+ "may be compile time intensive"),
+ cl::init(false));
+
+#define DEBUG_TYPE "regalloc"
+
+char RegAllocEvictionAdvisorAnalysis::ID = 0;
+INITIALIZE_PASS(RegAllocEvictionAdvisorAnalysis, "regalloc-evict",
+ "Regalloc eviction policy", false, true)
+
+namespace {
+class DefaultEvictionAdvisorAnalysis final
+ : public RegAllocEvictionAdvisorAnalysis {
+public:
+ DefaultEvictionAdvisorAnalysis(bool NotAsRequested)
+ : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Default),
+ NotAsRequested(NotAsRequested) {}
+
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Default;
+ }
+
+private:
+ std::unique_ptr<RegAllocEvictionAdvisor>
+ getAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
+ LiveIntervals *LIS, VirtRegMap *VRM,
+ const RegisterClassInfo &RegClassInfo,
+ ExtraRegInfo *ExtraInfo) override {
+ return std::make_unique<DefaultEvictionAdvisor>(MF, Matrix, LIS, VRM,
+ RegClassInfo, ExtraInfo);
+ }
+ bool doInitialization(Module &M) override {
+ if (NotAsRequested)
+ M.getContext().emitError("Requested regalloc eviction advisor analysis "
+ "could be created. Using default");
+ return RegAllocEvictionAdvisorAnalysis::doInitialization(M);
+ }
+ const bool NotAsRequested;
+};
+} // namespace
+
+template <> Pass *llvm::callDefaultCtor<RegAllocEvictionAdvisorAnalysis>() {
+ Pass *Ret = nullptr;
+ switch (Mode) {
+ case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default:
+ Ret = new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ false);
+ break;
+ case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development:
+ // TODO(mtrofin): add implementation
+ break;
+ case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release:
+ // TODO(mtrofin): add implementation
+ break;
+ }
+ if (Ret)
+ return Ret;
+ return new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ true);
+}
+
+StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const {
+ switch (getAdvisorMode()) {
+ case AdvisorMode::Default:
+ return "Default Regalloc Eviction Advisor";
+ case AdvisorMode::Release:
+ return "Release mode Regalloc Eviction Advisor";
+ case AdvisorMode::Development:
+ return "Development mode Regalloc Eviction Advisor";
+ }
+ llvm_unreachable("Unknown advisor kind");
+}
+
+RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(
+ const MachineFunction &MF, LiveRegMatrix *Matrix, LiveIntervals *LIS,
+ VirtRegMap *VRM, const RegisterClassInfo &RegClassInfo,
+ ExtraRegInfo *ExtraInfo)
+ : MF(MF), Matrix(Matrix), LIS(LIS), VRM(VRM), MRI(&VRM->getRegInfo()),
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RegClassInfo),
+ RegCosts(TRI->getRegisterCosts(MF)), ExtraInfo(ExtraInfo),
+ EnableLocalReassign(EnableLocalReassignment ||
+ MF.getSubtarget().enableRALocalReassignment(
+ MF.getTarget().getOptLevel())) {}
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index 85fd3207888b..debb75ed5020 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Pass.h"
namespace llvm {
@@ -85,6 +86,215 @@ struct EvictionCost {
std::tie(O.BrokenHints, O.MaxWeight);
}
};
+
+/// Track allocation stage and eviction loop prevention during allocation.
+// TODO(mtrofin): Consider exposing RAGreedy in a header instead, and folding
+// this back into it.
+class ExtraRegInfo final {
+ // RegInfo - Keep additional information about each live range.
+ struct RegInfo {
+ LiveRangeStage Stage = RS_New;
+
+ // Cascade - Eviction loop prevention. See
+ // canEvictInterferenceBasedOnCost().
+ unsigned Cascade = 0;
+
+ RegInfo() = default;
+ };
+
+ IndexedMap<RegInfo, VirtReg2IndexFunctor> Info;
+ unsigned NextCascade = 1;
+
+public:
+ ExtraRegInfo() = default;
+ ExtraRegInfo(const ExtraRegInfo &) = delete;
+
+ LiveRangeStage getStage(Register Reg) const { return Info[Reg].Stage; }
+
+ LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+ return getStage(VirtReg.reg());
+ }
+
+ void setStage(Register Reg, LiveRangeStage Stage) {
+ Info.grow(Reg.id());
+ Info[Reg].Stage = Stage;
+ }
+
+ void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ setStage(VirtReg.reg(), Stage);
+ }
+
+ /// Return the current stage of the register, if present, otherwise initialize
+ /// it and return that.
+ LiveRangeStage getOrInitStage(Register Reg) {
+ Info.grow(Reg.id());
+ return getStage(Reg);
+ }
+
+ unsigned getCascade(Register Reg) const { return Info[Reg].Cascade; }
+
+ void setCascade(Register Reg, unsigned Cascade) {
+ Info.grow(Reg.id());
+ Info[Reg].Cascade = Cascade;
+ }
+
+ unsigned getOrAssignNewCascade(Register Reg) {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade) {
+ Cascade = NextCascade++;
+ setCascade(Reg, Cascade);
+ }
+ return Cascade;
+ }
+
+ unsigned getCascadeOrCurrentNext(Register Reg) const {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade)
+ Cascade = NextCascade;
+ return Cascade;
+ }
+
+ template <typename Iterator>
+ void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+ for (; Begin != End; ++Begin) {
+ Register Reg = *Begin;
+ Info.grow(Reg.id());
+ if (Info[Reg].Stage == RS_New)
+ Info[Reg].Stage = NewStage;
+ }
+ }
+ void LRE_DidCloneVirtReg(Register New, Register Old);
+};
+
+/// Interface to the eviction advisor, which is responsible for making a
+/// decision as to which live ranges should be evicted (if any).
+class RegAllocEvictionAdvisor {
+public:
+ RegAllocEvictionAdvisor(const RegAllocEvictionAdvisor &) = delete;
+ RegAllocEvictionAdvisor(RegAllocEvictionAdvisor &&) = delete;
+ virtual ~RegAllocEvictionAdvisor() = default;
+
+ /// Find a physical register that can be freed by evicting the FixedRegisters,
+ /// or return NoRegister. The eviction decision is assumed to be correct (i.e.
+ /// no fixed live ranges are evicted) and profitable.
+ virtual MCRegister
+ tryFindEvictionCandidate(LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const = 0;
+
+ /// Find out if we can evict the live ranges occupying the given PhysReg,
+ /// which is a hint (preferred register) for VirtReg.
+ virtual bool
+ canEvictHintInterference(LiveInterval &VirtReg, MCRegister PhysReg,
+ const SmallVirtRegSet &FixedRegisters) const = 0;
+
+ /// Returns true if the given \p PhysReg is a callee saved register and has
+ /// not been used for allocation yet.
+ bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
+
+protected:
+ RegAllocEvictionAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
+ LiveIntervals *LIS, VirtRegMap *VRM,
+ const RegisterClassInfo &RegClassInfo,
+ ExtraRegInfo *ExtraInfo);
+
+ Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
+
+ const MachineFunction &MF;
+ LiveRegMatrix *const Matrix;
+ LiveIntervals *const LIS;
+ VirtRegMap *const VRM;
+ MachineRegisterInfo *const MRI;
+ const TargetRegisterInfo *const TRI;
+ const RegisterClassInfo &RegClassInfo;
+ const ArrayRef<uint8_t> RegCosts;
+ ExtraRegInfo *const ExtraInfo;
+
+ /// Run or not the local reassignment heuristic. This information is
+ /// obtained from the TargetSubtargetInfo.
+ const bool EnableLocalReassign;
+
+private:
+ unsigned NextCascade = 1;
+};
+
+/// ImmutableAnalysis abstraction for fetching the Eviction Advisor. We model it
+/// as an analysis to decouple the user from the implementation insofar as
+/// dependencies on other analyses goes. The motivation for it being an
+/// immutable pass is twofold:
+/// - in the ML implementation case, the evaluator is stateless but (especially
+/// in the development mode) expensive to set up. With an immutable pass, we set
+/// it up once.
+/// - in the 'development' mode ML case, we want to capture the training log
+/// during allocation (this is a log of features encountered and decisions
+/// made), and then measure a score, potentially a few steps after allocation
+/// completes. So we need the properties of an immutable pass to keep the logger
+/// state around until we can make that measurement.
+///
+/// Because we need to offer additional services in 'development' mode, the
+/// implementations of this analysis need to implement RTTI support.
+class RegAllocEvictionAdvisorAnalysis : public ImmutablePass {
+public:
+ enum class AdvisorMode : int { Default, Release, Development };
+
+ RegAllocEvictionAdvisorAnalysis(AdvisorMode Mode)
+ : ImmutablePass(ID), Mode(Mode){};
+ static char ID;
+
+ /// Get an advisor for the given context (i.e. machine function, etc)
+ virtual std::unique_ptr<RegAllocEvictionAdvisor>
+ getAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
+ LiveIntervals *LIS, VirtRegMap *VRM,
+ const RegisterClassInfo &RegClassInfo,
+ ExtraRegInfo *ExtraInfo) = 0;
+ AdvisorMode getAdvisorMode() const { return Mode; }
+
+private:
+ // This analysis preserves everything, and subclasses may have additional
+ // requirements.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ StringRef getPassName() const override;
+ const AdvisorMode Mode;
+};
+
+/// Specialization for the API used by the analysis infrastructure to create
+/// an instance of the eviction advisor.
+template <> Pass *callDefaultCtor<RegAllocEvictionAdvisorAnalysis>();
+
+// TODO(mtrofin): implement these.
+#ifdef LLVM_HAVE_TF_AOT
+RegAllocEvictionAdvisorAnalysis *createReleaseModeAdvisor();
+#endif
+
+#ifdef LLVM_HAVE_TF_API
+RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor();
+#endif
+
+// TODO: move to RegAllocEvictionAdvisor.cpp when we move implementation
+// out of RegAllocGreedy.cpp
+class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor {
+public:
+ DefaultEvictionAdvisor(const MachineFunction &MF, LiveRegMatrix *Matrix,
+ LiveIntervals *LIS, VirtRegMap *VRM,
+ const RegisterClassInfo &RegClassInfo,
+ ExtraRegInfo *ExtraInfo)
+ : RegAllocEvictionAdvisor(MF, Matrix, LIS, VRM, RegClassInfo, ExtraInfo) {
+ }
+
+private:
+ MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &,
+ uint8_t,
+ const SmallVirtRegSet &) const override;
+ bool canEvictHintInterference(LiveInterval &, MCRegister,
+ const SmallVirtRegSet &) const override;
+ bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool,
+ EvictionCost &,
+ const SmallVirtRegSet &) const;
+ bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
+};
} // namespace llvm
#endif // LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 50411c177007..ce3cf31dbd6b 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -112,12 +112,6 @@ static cl::opt<bool> ExhaustiveSearch(
"and interference cutoffs of last chance recoloring"),
cl::Hidden);
-static cl::opt<bool> EnableLocalReassignment(
- "enable-local-reassign", cl::Hidden,
- cl::desc("Local reassignment can yield better allocation decisions, but "
- "may be compile time intensive"),
- cl::init(false));
-
static cl::opt<bool> EnableDeferredSpilling(
"enable-deferred-spilling", cl::Hidden,
cl::desc("Instead of spilling a variable right away, defer the actual "
@@ -172,8 +166,9 @@ class RAGreedy : public MachineFunctionPass,
// state
std::unique_ptr<Spiller> SpillerInstance;
PQueue Queue;
- unsigned NextCascade;
std::unique_ptr<VirtRegAuxInfo> VRAI;
+ Optional<ExtraRegInfo> ExtraInfo;
+ std::unique_ptr<RegAllocEvictionAdvisor> EvictAdvisor;
// Enum CutOffStage to keep a track whether the register allocation failed
// because of the cutoffs encountered in last chance recoloring.
@@ -195,76 +190,6 @@ class RAGreedy : public MachineFunctionPass,
static const char *const StageName[];
#endif
- // RegInfo - Keep additional information about each live range.
- struct RegInfo {
- LiveRangeStage Stage = RS_New;
-
- // Cascade - Eviction loop prevention. See
- // canEvictInterferenceBasedOnCost().
- unsigned Cascade = 0;
-
- RegInfo() = default;
- };
-
- IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
-
- LiveRangeStage getStage(Register Reg) const {
- return ExtraRegInfo[Reg].Stage;
- }
-
- LiveRangeStage getStage(const LiveInterval &VirtReg) const {
- return getStage(VirtReg.reg());
- }
-
- void setStage(Register Reg, LiveRangeStage Stage) {
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
- ExtraRegInfo[Reg].Stage = Stage;
- }
-
- void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
- setStage(VirtReg.reg(), Stage);
- }
-
- /// Return the current stage of the register, if present, otherwise initialize
- /// it and return that.
- LiveRangeStage getOrInitStage(Register Reg) {
- ExtraRegInfo.grow(Reg);
- return getStage(Reg);
- }
-
- unsigned getCascade(Register Reg) const { return ExtraRegInfo[Reg].Cascade; }
-
- void setCascade(Register Reg, unsigned Cascade) {
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
- ExtraRegInfo[Reg].Cascade = Cascade;
- }
-
- unsigned getOrAssignNewCascade(Register Reg) {
- unsigned Cascade = getCascade(Reg);
- if (!Cascade) {
- Cascade = NextCascade++;
- setCascade(Reg, Cascade);
- }
- return Cascade;
- }
-
- unsigned getCascadeOrCurrentNext(Register Reg) const {
- unsigned Cascade = getCascade(Reg);
- if (!Cascade)
- Cascade = NextCascade;
- return Cascade;
- }
-
- template<typename Iterator>
- void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
- for (;Begin != End; ++Begin) {
- Register Reg = *Begin;
- if (ExtraRegInfo[Reg].Stage == RS_New)
- ExtraRegInfo[Reg].Stage = NewStage;
- }
- }
-
/// EvictionTrack - Keeps track of past evictions in order to optimize region
/// split decision.
class EvictionTrack {
@@ -375,10 +300,6 @@ class RAGreedy : public MachineFunctionPass,
/// Callee-save register cost, calculated once per machine function.
BlockFrequency CSRCost;
- /// Run or not the local reassignment heuristic. This information is
- /// obtained from the TargetSubtargetInfo.
- bool EnableLocalReassign;
-
/// Enable or not the consideration of the cost of local intervals created
/// by a split candidate when choosing the best split candidate.
bool EnableAdvancedRASplitCost;
@@ -447,13 +368,6 @@ private:
bool calcCompactRegion(GlobalSplitCandidate&);
void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
- Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
- bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
- bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool,
- EvictionCost &,
- const SmallVirtRegSet &) const;
- bool canEvictHintInterference(LiveInterval &, MCRegister,
- const SmallVirtRegSet &) const;
bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
MCRegister PhysReg, SlotIndex Start,
SlotIndex End, EvictionCost &MaxCost) const;
@@ -529,8 +443,6 @@ private:
BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister);
void collectHintInfo(Register, HintsInfo &);
- bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
-
/// Greedy RA statistic to remark.
struct RAGreedyStats {
unsigned Reloads = 0;
@@ -597,6 +509,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
INITIALIZE_PASS_END(RAGreedy, "greedy",
"Greedy Register Allocator", false, false)
@@ -663,6 +576,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<EdgeBundles>();
AU.addRequired<SpillPlacement>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -696,22 +610,25 @@ void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
}
void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
+ ExtraInfo->LRE_DidCloneVirtReg(New, Old);
+}
+
+void ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
// Cloning a register we haven't even heard about yet? Just ignore it.
- if (!ExtraRegInfo.inBounds(Old))
+ if (!Info.inBounds(Old))
return;
// LRE may clone a virtual register because dead code elimination causes it to
// be split into connected components. The new components are much smaller
// than the original, so they should get a new chance at being assigned.
// same stage as the parent.
- ExtraRegInfo[Old].Stage = RS_Assign;
- ExtraRegInfo.grow(New);
- ExtraRegInfo[New] = ExtraRegInfo[Old];
+ Info[Old].Stage = RS_Assign;
+ Info.grow(New.id());
+ Info[New] = Info[Old];
}
void RAGreedy::releaseMemory() {
SpillerInstance.reset();
- ExtraRegInfo.clear();
GlobalCand.clear();
}
@@ -725,10 +642,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
assert(Reg.isVirtual() && "Can only enqueue virtual registers");
unsigned Prio;
- auto Stage = getOrInitStage(Reg);
+ auto Stage = ExtraInfo->getOrInitStage(Reg);
if (Stage == RS_New) {
Stage = RS_Assign;
- setStage(Reg, Stage);
+ ExtraInfo->setStage(Reg, Stage);
}
if (Stage == RS_Split) {
// Unsplit ranges that couldn't be allocated immediately are deferred until
@@ -824,7 +741,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
MCRegister PhysHint = Hint.asMCReg();
LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
- if (canEvictHintInterference(VirtReg, PhysHint, FixedRegisters)) {
+ if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
+ FixedRegisters)) {
evictInterference(VirtReg, PhysHint, NewVRegs);
return PhysHint;
}
@@ -850,7 +768,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
// Interference eviction
//===----------------------------------------------------------------------===//
-Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) const {
+Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg,
+ Register PrevReg) const {
auto Order =
AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
MCRegister PhysReg;
@@ -889,9 +808,10 @@ Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) const {
/// register.
/// @param B The live range to be evicted.
/// @param BreaksHint True when B is already assigned to its preferred register.
-bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
- LiveInterval &B, bool BreaksHint) const {
- bool CanSplit = getStage(B) < RS_Spill;
+bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
+ LiveInterval &B,
+ bool BreaksHint) const {
+ bool CanSplit = ExtraInfo->getStage(B) < RS_Spill;
// Be fairly aggressive about following hints as long as the evictee can be
// split.
@@ -907,7 +827,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// canEvictHintInterference - return true if the interference for VirtReg
/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
-bool RAGreedy::canEvictHintInterference(
+bool DefaultEvictionAdvisor::canEvictHintInterference(
LiveInterval &VirtReg, MCRegister PhysReg,
const SmallVirtRegSet &FixedRegisters) const {
EvictionCost MaxCost;
@@ -925,7 +845,7 @@ bool RAGreedy::canEvictHintInterference(
/// @param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterferenceBasedOnCost(
+bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
// It is only possible to evict virtual register interference.
@@ -941,9 +861,7 @@ bool RAGreedy::canEvictInterferenceBasedOnCost(
//
// This works out so a register without a cascade number is allowed to evict
// anything, and it can be evicted by anything.
- unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
- if (!Cascade)
- Cascade = NextCascade;
+ unsigned Cascade = ExtraInfo->getCascadeOrCurrentNext(VirtReg.reg());
EvictionCost Cost;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
@@ -965,7 +883,7 @@ bool RAGreedy::canEvictInterferenceBasedOnCost(
return false;
// Never evict spill products. They cannot split or spill.
- if (getStage(*Intf) == RS_Done)
+ if (ExtraInfo->getStage(*Intf) == RS_Done)
return false;
// Once a live range becomes small enough, it is urgent that we find a
// register for it. This is indicated by an infinite spill weight. These
@@ -980,7 +898,7 @@ bool RAGreedy::canEvictInterferenceBasedOnCost(
RegClassInfo.getNumAllocatableRegs(
MRI->getRegClass(Intf->reg())));
// Only evict older cascades or live ranges without a cascade.
- unsigned IntfCascade = ExtraRegInfo[Intf->reg()].Cascade;
+ unsigned IntfCascade = ExtraInfo->getCascade(Intf->reg());
if (Cascade <= IntfCascade) {
if (!Urgent)
return false;
@@ -1043,7 +961,7 @@ bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
if (!Register::isVirtualRegister(Intf->reg()))
return false;
// Never evict spill products. They cannot split or spill.
- if (getStage(*Intf) == RS_Done)
+ if (ExtraInfo->getStage(*Intf) == RS_Done)
return false;
// Would this break a satisfied hint?
@@ -1106,7 +1024,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
// evicted by a newer cascade, preventing infinite loops.
- unsigned Cascade = getOrAssignNewCascade(VirtReg.reg());
+ unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
@@ -1132,10 +1050,10 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
Matrix->unassign(*Intf);
- assert((getCascade(Intf->reg()) < Cascade ||
+ assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
"Cannot decrease cascade number, illegal eviction");
- setCascade(Intf->reg(), Cascade);
+ ExtraInfo->setCascade(Intf->reg(), Cascade);
++NumEvicted;
NewVRegs.push_back(Intf->reg());
}
@@ -1143,7 +1061,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
/// Returns true if the given \p PhysReg is a callee saved register and has not
/// been used for allocation yet.
-bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
+bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
if (!CSR)
return false;
@@ -1151,7 +1069,7 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
return !Matrix->isPhysRegUsed(PhysReg);
}
-MCRegister RAGreedy::tryFindEvictionCandidate(
+MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
LiveInterval &VirtReg, const AllocationOrder &Order,
uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
// Keep track of the cheapest interference seen so far.
@@ -1225,8 +1143,8 @@ MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
TimePassesIsEnabled);
- MCRegister BestPhys =
- tryFindEvictionCandidate(VirtReg, Order, CostPerUseLimit, FixedRegisters);
+ MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
+ VirtReg, Order, CostPerUseLimit, FixedRegisters);
if (BestPhys.isValid())
evictInterference(VirtReg, BestPhys, NewVRegs);
return BestPhys;
@@ -1769,8 +1687,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// the ActiveBlocks list with each candidate. We need to filter out
// duplicates.
BitVector Todo = SA->getThroughBlocks();
- for (unsigned c = 0; c != UsedCands.size(); ++c) {
- ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
+ for (unsigned UsedCand : UsedCands) {
+ ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
for (unsigned Number : Blocks) {
if (!Todo.test(Number))
continue;
@@ -1817,13 +1735,13 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
// Ignore old intervals from DCE.
- if (getOrInitStage(Reg.reg()) != RS_New)
+ if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
continue;
// Remainder interval. Don't try splitting again, spill if it doesn't
// allocate.
if (IntvMap[I] == 0) {
- setStage(Reg, RS_Spill);
+ ExtraInfo->setStage(Reg, RS_Spill);
continue;
}
@@ -1834,7 +1752,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
<< " blocks as original.\n");
// Don't allow repeated splitting as a safe guard against looping.
- setStage(Reg, RS_Split2);
+ ExtraInfo->setStage(Reg, RS_Split2);
}
continue;
}
@@ -1899,7 +1817,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
unsigned BestCand = NoCand;
for (MCPhysReg PhysReg : Order) {
assert(PhysReg);
- if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg))
+ if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
continue;
// Discard bad candidates before we run out of interference cache cursors.
@@ -2065,8 +1983,8 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// goes straight to spilling, the new local ranges get to stay RS_New.
for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
- if (getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
- setStage(LI, RS_Spill);
+ if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
+ ExtraInfo->setStage(LI, RS_Spill);
}
if (VerifyEnabled)
@@ -2152,7 +2070,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SE->finish(&IntvMap);
DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
// Assign all new registers to RS_Spill. This was the last chance.
- setStage(LREdit.begin(), LREdit.end(), RS_Spill);
+ ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
return 0;
}
@@ -2320,7 +2238,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// These rules allow a 3 -> 2+3 split once, which we need. They also prevent
// excessive splitting and infinite loops.
//
- bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
+ bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
// Best split candidate.
unsigned BestBefore = NumGaps;
@@ -2456,7 +2374,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
assert(!ProgressRequired && "Didn't make progress when it was required.");
for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
if (IntvMap[I] == 1) {
- setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
+ ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
}
LLVM_DEBUG(dbgs() << '\n');
@@ -2477,7 +2395,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
const SmallVirtRegSet &FixedRegisters) {
// Ranges must be Split2 or less.
- if (getStage(VirtReg) >= RS_Spill)
+ if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
return 0;
// Local intervals are handled separately.
@@ -2499,7 +2417,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
// First try to split around a region spanning multiple blocks. RS_Split2
// ranges already made dubious progress with region splitting, so they go
// straight to single block splitting.
- if (getStage(VirtReg) < RS_Split2) {
+ if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
if (PhysReg || !NewVRegs.empty())
return PhysReg;
@@ -2551,7 +2469,7 @@ bool RAGreedy::mayRecolorAllInterferences(
// it would not be recolorable as it is in the same state as VirtReg.
// However, if VirtReg has tied defs and Intf doesn't, then
// there is still a point in examining if it can be recolorable.
- if (((getStage(*Intf) == RS_Done &&
+ if (((ExtraInfo->getStage(*Intf) == RS_Done &&
MRI->getRegClass(Intf->reg()) == CurRC) &&
!(hasTiedDef(MRI, VirtReg.reg()) &&
!hasTiedDef(MRI, Intf->reg()))) ||
@@ -2615,7 +2533,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
// Ranges must be Done.
- assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
+ assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
"Last chance recoloring should really be last chance");
// Set the max depth to LastChanceRecoloringMaxDepth.
// We may want to reconsider that if we end up with a too large search space
@@ -2806,7 +2724,7 @@ MCRegister
RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
MCRegister PhysReg, uint8_t &CostPerUseLimit,
SmallVectorImpl<Register> &NewVRegs) {
- if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
+ if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
// We choose spill over using the CSR for the first time if the spill cost
// is lower than CSRCost.
SA->analyze(&VirtReg);
@@ -2818,7 +2736,7 @@ RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
CostPerUseLimit = 1;
return 0;
}
- if (getStage(VirtReg) < RS_Split) {
+ if (ExtraInfo->getStage(VirtReg) < RS_Split) {
// We choose pre-splitting over using the CSR for the first time if
// the cost of splitting is lower than CSRCost.
SA->analyze(&VirtReg);
@@ -3051,8 +2969,8 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
// register.
- if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
- NewVRegs.empty()) {
+ if (CSRCost.getFrequency() &&
+ EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
CostPerUseLimit, NewVRegs);
if (CSRReg || !NewVRegs.empty())
@@ -3063,9 +2981,9 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return PhysReg;
}
- LiveRangeStage Stage = getStage(VirtReg);
+ LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
- << getCascade(VirtReg.reg()) << '\n');
+ << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
// Try to evict a less worthy live range, but only for ranges from the primary
// queue. The RS_Split ranges already failed to do this, and they should not
@@ -3094,7 +3012,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// Wait until the second time, when all smaller ranges have been allocated.
// This gives a better picture of the interference to split around.
if (Stage < RS_Split) {
- setStage(VirtReg, RS_Split);
+ ExtraInfo->setStage(VirtReg, RS_Split);
LLVM_DEBUG(dbgs() << "wait for second round\n");
NewVRegs.push_back(VirtReg.reg());
return 0;
@@ -3120,12 +3038,12 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// Finally spill VirtReg itself.
if ((EnableDeferredSpilling ||
TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
- getStage(VirtReg) < RS_Memory) {
+ ExtraInfo->getStage(VirtReg) < RS_Memory) {
// TODO: This is experimental and in particular, we do not model
// the live range splitting done by spilling correctly.
// We would need a deep integration with the spiller to do the
// right thing here. Anyway, that is still good for early testing.
- setStage(VirtReg, RS_Memory);
+ ExtraInfo->setStage(VirtReg, RS_Memory);
LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
NewVRegs.push_back(VirtReg.reg());
} else {
@@ -3133,7 +3051,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
TimerGroupDescription, TimePassesIsEnabled);
LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
- setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+ ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
// Tell LiveDebugVariables about the new ranges. Ranges not being covered by
// the new regs are kept in LDV (still mapping to the old register), until
@@ -3316,10 +3234,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
RCI.runOnMachineFunction(mf);
- EnableLocalReassign = EnableLocalReassignment ||
- MF->getSubtarget().enableRALocalReassignment(
- MF->getTarget().getOptLevel());
-
EnableAdvancedRASplitCost =
ConsiderLocalIntervalCost.getNumOccurrences()
? ConsiderLocalIntervalCost
@@ -3354,8 +3268,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
- ExtraRegInfo.clear();
- NextCascade = 1;
+ ExtraInfo.emplace();
+ EvictAdvisor = getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(
+ *MF, Matrix, LIS, VRM, RegClassInfo, &*ExtraInfo);
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
SetOfBrokenHints.clear();
diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp
index b22eb080791e..93be8f689d57 100644
--- a/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -623,8 +623,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
// Compute an initial allowed set for the current vreg.
std::vector<MCRegister> VRegAllowed;
ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
- for (unsigned I = 0; I != RawPRegOrder.size(); ++I) {
- MCRegister PReg(RawPRegOrder[I]);
+ for (MCPhysReg R : RawPRegOrder) {
+ MCRegister PReg(R);
if (MRI.isReserved(PReg))
continue;
diff --git a/llvm/lib/CodeGen/RegAllocScore.cpp b/llvm/lib/CodeGen/RegAllocScore.cpp
new file mode 100644
index 000000000000..740890831617
--- /dev/null
+++ b/llvm/lib/CodeGen/RegAllocScore.cpp
@@ -0,0 +1,124 @@
+//===- RegAllocScore.cpp - evaluate regalloc policy quality ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// Calculate a measure of the register allocation policy quality. This is used
+/// to construct a reward for the training of the ML-driven allocation policy.
+/// Currently, the score is the sum of the machine basic block frequency-weighed
+/// number of loads, stores, copies, and remat instructions, each factored with
+/// a relative weight.
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocScore.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstdint>
+#include <numeric>
+#include <vector>
+
+using namespace llvm;
+cl::opt<double> CopyWeight("regalloc-copy-weight", cl::init(0.2), cl::Hidden);
+cl::opt<double> LoadWeight("regalloc-load-weight", cl::init(4.0), cl::Hidden);
+cl::opt<double> StoreWeight("regalloc-store-weight", cl::init(1.0), cl::Hidden);
+cl::opt<double> CheapRematWeight("regalloc-cheap-remat-weight", cl::init(0.2),
+ cl::Hidden);
+cl::opt<double> ExpensiveRematWeight("regalloc-expensive-remat-weight",
+ cl::init(1.0), cl::Hidden);
+#define DEBUG_TYPE "regalloc-score"
+
+RegAllocScore &RegAllocScore::operator+=(const RegAllocScore &Other) {
+ CopyCounts += Other.copyCounts();
+ LoadCounts += Other.loadCounts();
+ StoreCounts += Other.storeCounts();
+ LoadStoreCounts += Other.loadStoreCounts();
+ CheapRematCounts += Other.cheapRematCounts();
+ ExpensiveRematCounts += Other.expensiveRematCounts();
+ return *this;
+}
+
+bool RegAllocScore::operator==(const RegAllocScore &Other) const {
+ return copyCounts() == Other.copyCounts() &&
+ loadCounts() == Other.loadCounts() &&
+ storeCounts() == Other.storeCounts() &&
+ loadStoreCounts() == Other.loadStoreCounts() &&
+ cheapRematCounts() == Other.cheapRematCounts() &&
+ expensiveRematCounts() == Other.expensiveRematCounts();
+}
+
+bool RegAllocScore::operator!=(const RegAllocScore &Other) const {
+ return !(*this == Other);
+}
+
+double RegAllocScore::getScore() const {
+ double Ret = 0.0;
+ Ret += CopyWeight * copyCounts();
+ Ret += LoadWeight * loadCounts();
+ Ret += StoreWeight * storeCounts();
+ Ret += (LoadWeight + StoreWeight) * loadStoreCounts();
+ Ret += CheapRematWeight * cheapRematCounts();
+ Ret += ExpensiveRematWeight * expensiveRematCounts();
+
+ return Ret;
+}
+
+RegAllocScore
+llvm::calculateRegAllocScore(const MachineFunction &MF,
+ const MachineBlockFrequencyInfo &MBFI,
+ AAResults &AAResults) {
+ return calculateRegAllocScore(
+ MF,
+ [&](const MachineBasicBlock &MBB) {
+ return MBFI.getBlockFreqRelativeToEntryBlock(&MBB);
+ },
+ [&](const MachineInstr &MI) {
+ return MF.getSubtarget().getInstrInfo()->isTriviallyReMaterializable(
+ MI, &AAResults);
+ });
+}
+
+RegAllocScore llvm::calculateRegAllocScore(
+ const MachineFunction &MF,
+ llvm::function_ref<double(const MachineBasicBlock &)> GetBBFreq,
+ llvm::function_ref<bool(const MachineInstr &)>
+ IsTriviallyRematerializable) {
+ RegAllocScore Total;
+
+ for (const MachineBasicBlock &MBB : MF) {
+ double BlockFreqRelativeToEntrypoint = GetBBFreq(MBB);
+ RegAllocScore MBBScore;
+
+ for (const MachineInstr &MI : MBB) {
+ if (MI.isDebugInstr() || MI.isKill() || MI.isInlineAsm()) {
+ continue;
+ }
+ if (MI.isCopy()) {
+ MBBScore.onCopy(BlockFreqRelativeToEntrypoint);
+ } else if (IsTriviallyRematerializable(MI)) {
+ if (MI.getDesc().isAsCheapAsAMove()) {
+ MBBScore.onCheapRemat(BlockFreqRelativeToEntrypoint);
+ } else {
+ MBBScore.onExpensiveRemat(BlockFreqRelativeToEntrypoint);
+ }
+ } else if (MI.mayLoad() && MI.mayStore()) {
+ MBBScore.onLoadStore(BlockFreqRelativeToEntrypoint);
+ } else if (MI.mayLoad()) {
+ MBBScore.onLoad(BlockFreqRelativeToEntrypoint);
+ } else if (MI.mayStore()) {
+ MBBScore.onStore(BlockFreqRelativeToEntrypoint);
+ }
+ }
+ Total += MBBScore;
+ }
+ return Total;
+}
diff --git a/llvm/lib/CodeGen/RegAllocScore.h b/llvm/lib/CodeGen/RegAllocScore.h
new file mode 100644
index 000000000000..3c28bb61189d
--- /dev/null
+++ b/llvm/lib/CodeGen/RegAllocScore.h
@@ -0,0 +1,80 @@
+//==- RegAllocScore.h - evaluate regalloc policy quality ----------*-C++-*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// Calculate a measure of the register allocation policy quality. This is used
+/// to construct a reward for the training of the ML-driven allocation policy.
+/// Currently, the score is the sum of the machine basic block frequency-weighed
+/// number of loads, stores, copies, and remat instructions, each factored with
+/// a relative weight.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCSCORE_H_
+#define LLVM_CODEGEN_REGALLOCSCORE_H_
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/Module.h"
+#include <cassert>
+#include <cstdint>
+#include <limits>
+
+namespace llvm {
+
+/// Regalloc score.
+class RegAllocScore final {
+ double CopyCounts = 0.0;
+ double LoadCounts = 0.0;
+ double StoreCounts = 0.0;
+ double CheapRematCounts = 0.0;
+ double LoadStoreCounts = 0.0;
+ double ExpensiveRematCounts = 0.0;
+
+public:
+ RegAllocScore() = default;
+ RegAllocScore(const RegAllocScore &) = default;
+
+ double copyCounts() const { return CopyCounts; }
+ double loadCounts() const { return LoadCounts; }
+ double storeCounts() const { return StoreCounts; }
+ double loadStoreCounts() const { return LoadStoreCounts; }
+ double expensiveRematCounts() const { return ExpensiveRematCounts; }
+ double cheapRematCounts() const { return CheapRematCounts; }
+
+ void onCopy(double Freq) { CopyCounts += Freq; }
+ void onLoad(double Freq) { LoadCounts += Freq; }
+ void onStore(double Freq) { StoreCounts += Freq; }
+ void onLoadStore(double Freq) { LoadStoreCounts += Freq; }
+ void onExpensiveRemat(double Freq) { ExpensiveRematCounts += Freq; }
+ void onCheapRemat(double Freq) { CheapRematCounts += Freq; }
+
+ RegAllocScore &operator+=(const RegAllocScore &Other);
+ bool operator==(const RegAllocScore &Other) const;
+ bool operator!=(const RegAllocScore &Other) const;
+ double getScore() const;
+};
+
+/// Calculate a score. When comparing 2 scores for the same function but
+/// different policies, the better policy would have a smaller score.
+/// The implementation is the overload below (which is also easily unittestable)
+RegAllocScore calculateRegAllocScore(const MachineFunction &MF,
+ const MachineBlockFrequencyInfo &MBFI,
+ AAResults &AAResults);
+
+/// Implementation of the above, which is also more easily unittestable.
+RegAllocScore calculateRegAllocScore(
+ const MachineFunction &MF,
+ llvm::function_ref<double(const MachineBasicBlock &)> GetBBFreq,
+ llvm::function_ref<bool(const MachineInstr &)> IsTriviallyRematerializable);
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_REGALLOCSCORE_H_
diff --git a/llvm/lib/CodeGen/RegisterClassInfo.cpp b/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 797899fb5b86..65a65b9cae95 100644
--- a/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -109,8 +109,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// FIXME: Once targets reserve registers instead of removing them from the
// allocation order, we can simply use begin/end here.
ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
- for (unsigned i = 0; i != RawOrder.size(); ++i) {
- unsigned PhysReg = RawOrder[i];
+ for (unsigned PhysReg : RawOrder) {
// Remove reserved registers from the allocation order.
if (Reserved.test(PhysReg))
continue;
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 4c8534cf2d01..a917b0d27d4a 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -4067,13 +4067,13 @@ void RegisterCoalescer::joinAllIntervals() {
// Coalesce intervals in MBB priority order.
unsigned CurrDepth = std::numeric_limits<unsigned>::max();
- for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ for (MBBPriorityInfo &MBB : MBBs) {
// Try coalescing the collected local copies for deeper loops.
- if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
+ if (JoinGlobalCopies && MBB.Depth < CurrDepth) {
coalesceLocals();
- CurrDepth = MBBs[i].Depth;
+ CurrDepth = MBB.Depth;
}
- copyCoalesceInMBB(MBBs[i].MBB);
+ copyCoalesceInMBB(MBB.MBB);
}
lateLiveIntervalUpdate();
coalesceLocals();
diff --git a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
index de6129a912d3..49859aeec78b 100644
--- a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
+++ b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
@@ -159,20 +159,17 @@ static bool reduceDbgValsBackwardScan(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 8> DbgValsToBeRemoved;
SmallDenseSet<DebugVariable> VariableSet;
- for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
- I != E; ++I) {
- MachineInstr *MI = &*I;
-
- if (MI->isDebugValue()) {
- DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
- MI->getDebugLoc()->getInlinedAt());
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (MI.isDebugValue()) {
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
auto R = VariableSet.insert(Var);
// If it is a DBG_VALUE describing a constant as:
// DBG_VALUE 0, ...
// we just don't consider such instructions as candidates
// for redundant removal.
- if (MI->isNonListDebugValue()) {
- MachineOperand &Loc = MI->getDebugOperand(0);
+ if (MI.isNonListDebugValue()) {
+ MachineOperand &Loc = MI.getDebugOperand(0);
if (!Loc.isReg()) {
// If we have already encountered this variable, just stop
// tracking it.
@@ -185,7 +182,7 @@ static bool reduceDbgValsBackwardScan(MachineBasicBlock &MBB) {
// We have already encountered the value for this variable,
// so this one can be deleted.
if (!R.second)
- DbgValsToBeRemoved.push_back(MI);
+ DbgValsToBeRemoved.push_back(&MI);
continue;
}
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index 50d9d64bfcfd..3d8a7eecce18 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -521,8 +521,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
StackLayout SSL(StackAlignment);
if (StackGuardSlot) {
Type *Ty = StackGuardSlot->getAllocatedType();
- uint64_t Align =
- std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
+ Align Align = std::max(DL.getPrefTypeAlign(Ty), StackGuardSlot->getAlign());
SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
Align, SSC.getFullLiveRange());
}
@@ -534,8 +533,9 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- uint64_t Align =
- std::max(DL.getPrefTypeAlignment(Ty), Arg->getParamAlignment());
+ Align Align = DL.getPrefTypeAlign(Ty);
+ if (auto A = Arg->getParamAlign())
+ Align = std::max(Align, *A);
SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());
}
@@ -546,24 +546,24 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- uint64_t Align = std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment());
+ Align Align = std::max(DL.getPrefTypeAlign(Ty), AI->getAlign());
SSL.addObject(AI, Size, Align,
ClColoring ? SSC.getLiveRange(AI) : NoColoringRange);
}
SSL.computeLayout();
- uint64_t FrameAlignment = SSL.getFrameAlignment();
+ Align FrameAlignment = SSL.getFrameAlignment();
// FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location
// (AlignmentSkew).
if (FrameAlignment > StackAlignment) {
// Re-align the base pointer according to the max requested alignment.
- assert(isPowerOf2_64(FrameAlignment));
IRB.SetInsertPoint(BasePointer->getNextNode());
BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
- IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
- ConstantInt::get(IntPtrTy, ~uint64_t(FrameAlignment - 1))),
+ IRB.CreateAnd(
+ IRB.CreatePtrToInt(BasePointer, IntPtrTy),
+ ConstantInt::get(IntPtrTy, ~(FrameAlignment.value() - 1))),
StackPtrTy));
}
diff --git a/llvm/lib/CodeGen/SafeStackLayout.cpp b/llvm/lib/CodeGen/SafeStackLayout.cpp
index 7cdda7743c16..602afcfa9001 100644
--- a/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -37,7 +37,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {
}
}
-void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment,
+void StackLayout::addObject(const Value *V, unsigned Size, Align Alignment,
const StackLifetime::LiveRange &Range) {
StackObjects.push_back({V, Size, Alignment, Range});
ObjectAlignments[V] = Alignment;
@@ -45,7 +45,7 @@ void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment,
}
static unsigned AdjustStackOffset(unsigned Offset, unsigned Size,
- uint64_t Alignment) {
+ Align Alignment) {
return alignTo(Offset + Size, Alignment) - Size;
}
@@ -62,7 +62,8 @@ void StackLayout::layoutObject(StackObject &Obj) {
}
LLVM_DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align "
- << Obj.Alignment << ", range " << Obj.Range << "\n");
+ << Obj.Alignment.value() << ", range " << Obj.Range
+ << "\n");
assert(Obj.Alignment <= MaxAlignment);
unsigned Start = AdjustStackOffset(0, Obj.Size, Obj.Alignment);
unsigned End = Start + Obj.Size;
diff --git a/llvm/lib/CodeGen/SafeStackLayout.h b/llvm/lib/CodeGen/SafeStackLayout.h
index b72450e57080..4ac7af2059f5 100644
--- a/llvm/lib/CodeGen/SafeStackLayout.h
+++ b/llvm/lib/CodeGen/SafeStackLayout.h
@@ -22,7 +22,7 @@ namespace safestack {
/// Compute the layout of an unsafe stack frame.
class StackLayout {
- uint64_t MaxAlignment;
+ Align MaxAlignment;
struct StackRegion {
unsigned Start;
@@ -40,14 +40,14 @@ class StackLayout {
struct StackObject {
const Value *Handle;
unsigned Size;
- uint64_t Alignment;
+ Align Alignment;
StackLifetime::LiveRange Range;
};
SmallVector<StackObject, 8> StackObjects;
DenseMap<const Value *, unsigned> ObjectOffsets;
- DenseMap<const Value *, uint64_t> ObjectAlignments;
+ DenseMap<const Value *, Align> ObjectAlignments;
void layoutObject(StackObject &Obj);
@@ -56,7 +56,7 @@ public:
/// Add an object to the stack frame. Value pointer is opaque and used as a
/// handle to retrieve the object's offset in the frame later.
- void addObject(const Value *V, unsigned Size, uint64_t Alignment,
+ void addObject(const Value *V, unsigned Size, Align Alignment,
const StackLifetime::LiveRange &Range);
/// Run the layout computation for all previously added objects.
@@ -66,13 +66,13 @@ public:
unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; }
/// Returns the alignment of the object
- uint64_t getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
+ Align getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
/// Returns the size of the entire frame.
unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; }
/// Returns the alignment of the frame.
- uint64_t getFrameAlignment() { return MaxAlignment; }
+ Align getFrameAlignment() { return MaxAlignment; }
void print(raw_ostream &OS);
};
diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp
index ef3afab2b730..696b29018ae6 100644
--- a/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -618,8 +618,8 @@ std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU,
do {
const SUnit *SU = WorkList.back();
WorkList.pop_back();
- for (int I = SU->Succs.size()-1; I >= 0; --I) {
- const SUnit *Succ = SU->Succs[I].getSUnit();
+ for (const SDep &SD : llvm::reverse(SU->Succs)) {
+ const SUnit *Succ = SD.getSUnit();
unsigned s = Succ->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
if (Succ->isBoundaryNode())
@@ -652,8 +652,8 @@ std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU,
do {
const SUnit *SU = WorkList.back();
WorkList.pop_back();
- for (int I = SU->Preds.size()-1; I >= 0; --I) {
- const SUnit *Pred = SU->Preds[I].getSUnit();
+ for (const SDep &SD : llvm::reverse(SU->Preds)) {
+ const SUnit *Pred = SD.getSUnit();
unsigned s = Pred->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
if (Pred->isBoundaryNode())
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index df5a041b87cd..067ad819e0d2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -487,10 +487,7 @@ namespace {
SDValue visitFCEIL(SDNode *N);
SDValue visitFTRUNC(SDNode *N);
SDValue visitFFLOOR(SDNode *N);
- SDValue visitFMINNUM(SDNode *N);
- SDValue visitFMAXNUM(SDNode *N);
- SDValue visitFMINIMUM(SDNode *N);
- SDValue visitFMAXIMUM(SDNode *N);
+ SDValue visitFMinMax(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
@@ -1701,10 +1698,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
case ISD::FFLOOR: return visitFFLOOR(N);
- case ISD::FMINNUM: return visitFMINNUM(N);
- case ISD::FMAXNUM: return visitFMAXNUM(N);
- case ISD::FMINIMUM: return visitFMINIMUM(N);
- case ISD::FMAXIMUM: return visitFMAXIMUM(N);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: return visitFMinMax(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
@@ -2260,6 +2257,21 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
+ // fold (add x, undef) -> undef
+ if (N0.isUndef())
+ return N0;
+ if (N1.isUndef())
+ return N1;
+
+ // fold (add c1, c2) -> c1+c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
@@ -2268,23 +2280,6 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold (add x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
- return N1;
- }
-
- // fold (add x, undef) -> undef
- if (N0.isUndef())
- return N0;
-
- if (N1.isUndef())
- return N1;
-
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
- // canonicalize constant to RHS
- if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
- // fold (add c1, c2) -> c1+c2
- return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
}
// fold (add x, 0) -> x
@@ -2554,6 +2549,19 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
+ // fold (add_sat x, undef) -> -1
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getAllOnesConstant(DL, VT);
+
+ // fold (add_sat c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, VT, N1, N0);
+
// fold vector ops
if (VT.isVector()) {
// TODO SimplifyVBinOp
@@ -2561,20 +2569,6 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
// fold (add_sat x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
- return N1;
- }
-
- // fold (add_sat x, undef) -> -1
- if (N0.isUndef() || N1.isUndef())
- return DAG.getAllOnesConstant(DL, VT);
-
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
- // canonicalize constant to RHS
- if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(Opcode, DL, VT, N1, N0);
- // fold (add_sat c1, c2) -> c3
- return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
}
// fold (add_sat x, 0) -> x
@@ -3260,6 +3254,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
+ // fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
+ if (N0 == N1)
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+ // fold (sub c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
@@ -3270,15 +3273,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return N0;
}
- // fold (sub x, x) -> 0
- // FIXME: Refactor this and xor and other similar operations together.
- if (N0 == N1)
- return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
-
- // fold (sub c1, c2) -> c3
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
- return C;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -3611,15 +3605,6 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
- // fold vector ops
- if (VT.isVector()) {
- // TODO SimplifyVBinOp
-
- // fold (sub_sat x, 0) -> x, vector edition
- if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
- return N0;
- }
-
// fold (sub_sat x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -3632,6 +3617,15 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
return C;
+ // fold vector ops
+ if (VT.isVector()) {
+ // TODO SimplifyVBinOp
+
+ // fold (sub_sat x, 0) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
// fold (sub_sat x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -3781,6 +3775,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
+ // fold (mul c1, c2) -> c1*c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
+
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
APInt ConstValue1;
@@ -3802,15 +3805,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
}
- // fold (mul c1, c2) -> c1*c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
- return C;
-
- // canonicalize constant to RHS (vector doesn't have to splat)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
-
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1.isZero())
return N1;
@@ -4140,17 +4134,17 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
EVT CCVT = getSetCCResultType(VT);
SDLoc DL(N);
+ // fold (sdiv c1, c2) -> c1/c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- // fold (sdiv c1, c2) -> c1/c2
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
- return C;
-
// fold (sdiv X, -1) -> 0-X
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && N1C->isAllOnes())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
@@ -4284,17 +4278,17 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
EVT CCVT = getSetCCResultType(VT);
SDLoc DL(N);
+ // fold (udiv c1, c2) -> c1/c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- // fold (udiv c1, c2) -> c1/c2
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
- return C;
-
// fold (udiv X, -1) -> select(X == -1, 1, 0)
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
@@ -4463,6 +4457,15 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // fold (mulhs c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
+
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
@@ -4474,15 +4477,6 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
return DAG.getConstant(0, DL, VT);
}
- // fold (mulhs c1, c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
- return C;
-
- // canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
-
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4523,6 +4517,15 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // fold (mulhu c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
+
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
@@ -4534,15 +4537,6 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return DAG.getConstant(0, DL, VT);
}
- // fold (mulhu c1, c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
- return C;
-
- // canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
-
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4786,12 +4780,14 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
}
// Function to calculate whether the Min/Max pair of SDNodes (potentially
-// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1)
-// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp
-// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The
-// operands are the same as SimplifySelectCC. N0<N1 ? N2 : N3
+// swapped around) make a signed saturate pattern, clamping to between a signed
+// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
+// Returns the node being clamped and the bitwidth of the clamp in BW. Should
+// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
+// same as SimplifySelectCC. N0<N1 ? N2 : N3.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
- SDValue N3, ISD::CondCode CC, unsigned &BW) {
+ SDValue N3, ISD::CondCode CC, unsigned &BW,
+ bool &Unsigned) {
auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
// The compare and select operand should be the same or the select operands
@@ -4858,17 +4854,27 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
const APInt &MinC = MinCOp->getAPIntValue();
const APInt &MaxC = MaxCOp->getAPIntValue();
APInt MinCPlus1 = MinC + 1;
- if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2())
- return SDValue();
- BW = MinCPlus1.exactLogBase2() + 1;
- return N02;
+ if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
+ BW = MinCPlus1.exactLogBase2() + 1;
+ Unsigned = false;
+ return N02;
+ }
+
+ if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
+ BW = MinCPlus1.exactLogBase2();
+ Unsigned = true;
+ return N02;
+ }
+
+ return SDValue();
}
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
SDValue N3, ISD::CondCode CC,
SelectionDAG &DAG) {
unsigned BW;
- SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW);
+ bool Unsigned;
+ SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
return SDValue();
EVT FPVT = Fp.getOperand(0).getValueType();
@@ -4876,13 +4882,14 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
if (FPVT.isVector())
NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
FPVT.getVectorElementCount());
- if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(
- ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT))
+ unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
+ if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
return SDValue();
SDLoc DL(Fp);
- SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0),
+ SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
DAG.getValueType(NewVT.getScalarType()));
- return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
+ return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
+ : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
}
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
@@ -4892,11 +4899,6 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDLoc DL(N);
- // fold vector ops
- if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
-
// fold operation with constant operands.
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
@@ -4904,7 +4906,12 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(N->getOpcode(), DL, VT, N1, N0);
+ return DAG.getNode(Opcode, DL, VT, N1, N0);
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
@@ -5777,6 +5784,15 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N0 == N1)
return N0;
+ // fold (and c1, c2) -> c1&c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
@@ -5824,22 +5840,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- // fold (and c1, c2) -> c1&c2
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
- return C;
-
- // canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
-
// fold (and x, -1) -> x
if (isAllOnesConstant(N1))
return N0;
// if (and x, c) is known to be zero, return 0
unsigned BitWidth = VT.getScalarSizeInBits();
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
@@ -6546,21 +6553,25 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N0 == N1)
return N0;
+ // fold (or c1, c2) -> c1|c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (or x, 0) -> x, vector edition
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
- return N1;
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return N0;
// fold (or x, -1) -> -1, vector edition
- if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
- // do not return N0, because undef node may exist in N0
- return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
@@ -6629,16 +6640,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
- // fold (or c1, c2) -> c1|c2
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
- return C;
-
- // canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
-
// fold (or x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -6651,6 +6652,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return NewSel;
// fold (or x, c) -> c iff (x & ~c) == 0
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
@@ -7941,18 +7943,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
- // fold vector ops
- if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
-
- // fold (xor x, 0) -> x, vector edition
- if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
- return N1;
- if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
- return N0;
- }
-
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -7969,9 +7959,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (xor x, 0) -> x, vector edition
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -8409,6 +8409,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ // fold (shl c1, c2) -> c1<<c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
@@ -8434,12 +8438,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
- // fold (shl c1, c2) -> c1<<c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
- return C;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -8558,6 +8556,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
// TODO - support non-uniform vector shift amounts.
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
N0->getFlags().hasExact()) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
@@ -8758,6 +8757,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ // fold (sra c1, c2) -> (sra c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
+ return C;
+
// Arithmetic shifting an all-sign-bit value is a no-op.
// fold (sra 0, x) -> 0
// fold (sra -1, x) -> -1
@@ -8769,17 +8772,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
- // fold (sra c1, c2) -> (sra c1, c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
- return C;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
@@ -8962,21 +8960,20 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ // fold (srl c1, c2) -> c1 >>u c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
- // fold (srl c1, c2) -> c1 >>u c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
- return C;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// if (srl x, c) is known to be zero, return 0
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C &&
DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
@@ -10043,6 +10040,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
SDValue Chain = MST->getChain();
+ SDValue Value = MST->getValue();
+ SDValue Ptr = MST->getBasePtr();
SDLoc DL(N);
// Zap masked stores with a zero mask.
@@ -10054,12 +10053,50 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
!MST->isCompressingStore() && !MST->isTruncatingStore())
return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
- MST->getBasePtr(), MST->getMemOperand());
+ MST->getBasePtr(), MST->getPointerInfo(),
+ MST->getOriginalAlign(), MachineMemOperand::MOStore,
+ MST->getAAInfo());
// Try transforming N to an indexed store.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
+ if (MST->isTruncatingStore() && MST->isUnindexed() &&
+ Value.getValueType().isInteger() &&
+ (!isa<ConstantSDNode>(Value) ||
+ !cast<ConstantSDNode>(Value)->isOpaque())) {
+ APInt TruncDemandedBits =
+ APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ MST->getMemoryVT().getScalarSizeInBits());
+
+ // See if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
+ // Re-visit the store if anything changed and the store hasn't been merged
+ // with another node (N is deleted) SimplifyDemandedBits will add Value's
+ // node back to the worklist if necessary, but we also need to re-visit
+ // the Store node itself.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
+
+ // If this is a TRUNC followed by a masked store, fold this into a masked
+ // truncating store. We can do this even if this is already a masked
+ // truncstore.
+ if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() &&
+ MST->isUnindexed() &&
+ TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
+ MST->getMemoryVT(), LegalOperations)) {
+ auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
+ Value.getOperand(0).getValueType());
+ return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ MST->getOffset(), Mask, MST->getMemoryVT(),
+ MST->getMemOperand(), MST->getAddressingMode(),
+ /*IsTruncating=*/true);
+ }
+
return SDValue();
}
@@ -10109,8 +10146,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
// FIXME: Can we do this for indexed, expanding, or extending loads?
if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
!MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
- SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
- MLD->getBasePtr(), MLD->getMemOperand());
+ SDValue NewLd = DAG.getLoad(
+ N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
+ MLD->getPointerInfo(), MLD->getOriginalAlign(),
+ MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
return CombineTo(N, NewLd, NewLd.getValue(1));
}
@@ -13876,19 +13915,19 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
- // fold vector ops
- if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
-
// fold (fadd c1, c2) -> c1 + c2
- if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
+ return C;
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
@@ -14084,15 +14123,15 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
+ // fold (fsub c1, c2) -> c1-c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- // fold (fsub c1, c2) -> c1-c2
- if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -14157,7 +14196,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
@@ -14168,22 +14206,20 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
- // fold vector ops
- if (VT.isVector()) {
- // This just handles C1 * C2 for vectors. Other vector folds are below.
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
- }
-
// fold (fmul c1, c2) -> c1*c2
- if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
+ return C;
// canonicalize constant to RHS
if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -14495,8 +14531,6 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -14506,15 +14540,15 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
+ // fold (fdiv c1, c2) -> c1/c2
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
+ return C;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- // fold (fdiv c1, c2) -> c1/c2
- if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -14523,7 +14557,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
- if (N1CFP) {
+ if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
// Compute the reciprocal 1.0 / c2.
const APFloat &N1APF = N1CFP->getValueAPF();
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
@@ -14639,8 +14673,6 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -14649,9 +14681,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
return R;
// fold (frem c1, c2) -> fmod(c1,c2)
- if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
-
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
+ return C;
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -14712,12 +14744,12 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
- bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
- if (N0CFP && N1CFP) // Constant fold
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
+ // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
+ return C;
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
const APFloat &V = N1C->getValueAPF();
@@ -14835,14 +14867,6 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
- // This optimization is guarded by a function attribute because it may produce
- // unexpected results. Ie, programs may be relying on the platform-specific
- // undefined behavior when the float-to-int conversion overflows.
- const Function &F = DAG.getMachineFunction().getFunction();
- Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
- if (StrictOverflow.getValueAsString().equals("false"))
- return SDValue();
-
// We only do this if the target has legal ftrunc. Otherwise, we'd likely be
// replacing casts with a libcall. We also must be allowed to ignore -0.0
// because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
@@ -15216,31 +15240,26 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return SDValue();
}
-static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
- APFloat (*Op)(const APFloat &, const APFloat &)) {
+SDValue DAGCombiner::visitFMinMax(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
- const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
const SDNodeFlags Flags = N->getFlags();
unsigned Opc = N->getOpcode();
bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
- if (N0CFP && N1CFP) {
- const APFloat &C0 = N0CFP->getValueAPF();
- const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
- }
+ // Constant fold.
+ if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
+ return C;
// Canonicalize to constant on RHS.
if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
- if (N1CFP) {
+ if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
const APFloat &AF = N1CFP->getValueAPF();
// minnum(X, nan) -> X
@@ -15272,22 +15291,6 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
return SDValue();
}
-SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
- return visitFMinMax(DAG, N, minnum);
-}
-
-SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
- return visitFMinMax(DAG, N, maxnum);
-}
-
-SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
- return visitFMinMax(DAG, N, minimum);
-}
-
-SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
- return visitFMinMax(DAG, N, maximum);
-}
-
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -18392,8 +18395,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST->isUnindexed() && ST->isSimple() &&
ST1->isUnindexed() && ST1->isSimple()) {
- if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
- ST->getMemoryVT() == ST1->getMemoryVT() &&
+ if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
+ ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
ST->getAddressSpace() == ST1->getAddressSpace()) {
// If this is a store followed by a store with the same value to the
// same location, then the store is dead/noop.
@@ -20727,6 +20730,156 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return NewLd;
}
+/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
+/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
+/// EXTRACT_SUBVECTOR(Op?, ?),
+/// Mask'))
+/// iff it is legal and profitable to do so. Notably, the trimmed mask
+/// (containing only the elements that are extracted)
+/// must reference at most two subvectors.
+static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ "Must only be called on EXTRACT_SUBVECTOR's");
+
+ SDValue N0 = N->getOperand(0);
+
+ // Only deal with non-scalable vectors.
+ EVT NarrowVT = N->getValueType(0);
+ EVT WideVT = N0.getValueType();
+ if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
+ return SDValue();
+
+ // The operand must be a shufflevector.
+ auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
+ if (!WideShuffleVector)
+ return SDValue();
+
+ // The old shuffleneeds to go away.
+ if (!WideShuffleVector->hasOneUse())
+ return SDValue();
+
+ // And the narrow shufflevector that we'll form must be legal.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
+ return SDValue();
+
+ uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
+ int NumEltsExtracted = NarrowVT.getVectorNumElements();
+ assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
+ "Extract index is not a multiple of the output vector length.");
+
+ int WideNumElts = WideVT.getVectorNumElements();
+
+ SmallVector<int, 16> NewMask;
+ NewMask.reserve(NumEltsExtracted);
+ SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
+ DemandedSubvectors;
+
+ // Try to decode the wide mask into narrow mask from at most two subvectors.
+ for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
+ NumEltsExtracted)) {
+ assert((M >= -1) && (M < (2 * WideNumElts)) &&
+ "Out-of-bounds shuffle mask?");
+
+ if (M < 0) {
+ // Does not depend on operands, does not require adjustment.
+ NewMask.emplace_back(M);
+ continue;
+ }
+
+ // From which operand of the shuffle does this shuffle mask element pick?
+ int WideShufOpIdx = M / WideNumElts;
+ // Which element of that operand is picked?
+ int OpEltIdx = M % WideNumElts;
+
+ assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
+ "Shuffle mask vector decomposition failure.");
+
+ // And which NumEltsExtracted-sized subvector of that operand is that?
+ int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
+ // And which element within that subvector of that operand is that?
+ int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
+
+ assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
+ "Shuffle mask subvector decomposition failure.");
+
+ assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
+ WideShufOpIdx * WideNumElts) == M &&
+ "Shuffle mask full decomposition failure.");
+
+ SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
+
+ if (Op.isUndef()) {
+ // Picking from an undef operand. Let's adjust mask instead.
+ NewMask.emplace_back(-1);
+ continue;
+ }
+
+ // Profitability check: only deal with extractions from the first subvector.
+ if (OpSubvecIdx != 0)
+ return SDValue();
+
+ const std::pair<SDValue, int> DemandedSubvector =
+ std::make_pair(Op, OpSubvecIdx);
+
+ if (DemandedSubvectors.insert(DemandedSubvector)) {
+ if (DemandedSubvectors.size() > 2)
+ return SDValue(); // We can't handle more than two subvectors.
+ // How many elements into the WideVT does this subvector start?
+ int Index = NumEltsExtracted * OpSubvecIdx;
+ // Bail out if the extraction isn't going to be cheap.
+ if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
+ return SDValue();
+ }
+
+ // Ok, but from which operand of the new shuffle will this element pick?
+ int NewOpIdx =
+ getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
+ assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
+
+ int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
+ NewMask.emplace_back(AdjM);
+ }
+ assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
+ assert(DemandedSubvectors.size() <= 2 &&
+ "Should have ended up demanding at most two subvectors.");
+
+ // Did we discover that the shuffle does not actually depend on operands?
+ if (DemandedSubvectors.empty())
+ return DAG.getUNDEF(NarrowVT);
+
+ // We still perform the exact same EXTRACT_SUBVECTOR, just on different
+ // operand[s]/index[es], so there is no point in checking for it's legality.
+
+ // Do not turn a legal shuffle into an illegal one.
+ if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
+ !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ SmallVector<SDValue, 2> NewOps;
+ for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
+ &DemandedSubvector : DemandedSubvectors) {
+ // How many elements into the WideVT does this subvector start?
+ int Index = NumEltsExtracted * DemandedSubvector.second;
+ SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
+ NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
+ DemandedSubvector.first, IndexC));
+ }
+ assert((NewOps.size() == 1 || NewOps.size() == 2) &&
+ "Should end up with either one or two ops");
+
+ // If we ended up with only one operand, pad with an undef.
+ if (NewOps.size() == 1)
+ NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
+
+ return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
+}
+
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
@@ -20840,6 +20993,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
}
}
+ if (SDValue V =
+ foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
+ return V;
+
V = peekThroughBitcasts(V);
// If the input is a build vector. Try to make a smaller build vector.
@@ -22424,15 +22581,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- SDValue Ops[] = {LHS, RHS};
unsigned Opcode = N->getOpcode();
SDNodeFlags Flags = N->getFlags();
- // See if we can constant fold the vector operation.
- if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS),
- LHS.getValueType(), Ops))
- return Fold;
-
// Move unary shuffles with identical masks after a vector binop:
// VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
// --> shuffle (VBinOp A, B), Undef, Mask
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 08598eeded7a..5dfb65ef131a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3367,13 +3367,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::FSHL:
case ISD::FSHR:
- if (TLI.expandFunnelShift(Node, Tmp1, DAG))
- Results.push_back(Tmp1);
+ if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG))
+ Results.push_back(Expanded);
break;
case ISD::ROTL:
case ISD::ROTR:
- if (TLI.expandROT(Node, true /*AllowVectorOps*/, Tmp1, DAG))
- Results.push_back(Tmp1);
+ if (SDValue Expanded = TLI.expandROT(Node, true /*AllowVectorOps*/, DAG))
+ Results.push_back(Expanded);
break;
case ISD::SADDSAT:
case ISD::UADDSAT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 1fa4d88fcb4a..518e525e13d0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1277,8 +1277,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) {
SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
// Lower the rotate to shifts and ORs which can be promoted.
- SDValue Res;
- TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ SDValue Res = TLI.expandROT(N, true /*AllowVectorOps*/, DAG);
ReplaceValueWith(SDValue(N, 0), Res);
return SDValue();
}
@@ -1286,7 +1285,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
SDValue Hi = GetPromotedInteger(N->getOperand(0));
SDValue Lo = GetPromotedInteger(N->getOperand(1));
- SDValue Amount = GetPromotedInteger(N->getOperand(2));
+ SDValue Amt = GetPromotedInteger(N->getOperand(2));
SDLoc DL(N);
EVT OldVT = N->getOperand(0).getValueType();
@@ -1297,21 +1296,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
unsigned NewBits = VT.getScalarSizeInBits();
// Amount has to be interpreted modulo the old bit width.
- Amount =
- DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT));
+ Amt = DAG.getNode(ISD::UREM, DL, VT, Amt, DAG.getConstant(OldBits, DL, VT));
// If the promoted type is twice the size (or more), then we use the
// traditional funnel 'double' shift codegen. This isn't necessary if the
// shift amount is constant.
// fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
// fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
- if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) &&
+ if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amt) &&
!TLI.isOperationLegalOrCustom(Opcode, VT)) {
SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift);
Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo);
- Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount);
+ Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amt);
if (!IsFSHR)
Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift);
return Res;
@@ -1324,9 +1322,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
// Increase Amount to shift the result into the lower bits of the promoted
// type.
if (IsFSHR)
- Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset);
+ Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, ShiftOffset);
- return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount);
+ return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt);
}
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 98312f91d8c0..03dcd0f6d2c9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -83,7 +83,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
SDValue Res(&Node, i);
bool Failed = false;
// Don't create a value in map.
- auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0;
+ auto ResId = ValueToIdMap.lookup(Res);
unsigned Mapped = 0;
if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) {
@@ -301,7 +301,7 @@ ScanOperands:
if (IgnoreNodeResults(N->getOperand(i).getNode()))
continue;
- const auto Op = N->getOperand(i);
+ const auto &Op = N->getOperand(i);
LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG));
EVT OpVT = Op.getValueType();
switch (getTypeAction(OpVT)) {
@@ -1007,11 +1007,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
///
/// ValVT is the type of values that produced the boolean.
SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
- SDLoc dl(Bool);
- EVT BoolVT = getSetCCResultType(ValVT);
- ISD::NodeType ExtendCode =
- TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT));
- return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
+ return TLI.promoteTargetBoolean(DAG, Bool, ValVT);
}
/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 88a28a3be53e..1493f36fcd3e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -254,69 +254,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
- if (Op.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(Node);
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
- LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
- Node->dump(&DAG));
- switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
- LD->getMemoryVT())) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- return TranslateLegalizeResults(Op, Node);
- case TargetLowering::Custom: {
- SmallVector<SDValue, 2> ResultVals;
- if (LowerOperationWrapper(Node, ResultVals)) {
- if (ResultVals.empty())
- return TranslateLegalizeResults(Op, Node);
-
- Changed = true;
- return RecursivelyLegalizeResults(Op, ResultVals);
- }
- LLVM_FALLTHROUGH;
- }
- case TargetLowering::Expand: {
- Changed = true;
- std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
- AddLegalizedOperand(Op.getValue(0), Tmp.first);
- AddLegalizedOperand(Op.getValue(1), Tmp.second);
- return Op.getResNo() ? Tmp.first : Tmp.second;
- }
- }
- }
- } else if (Op.getOpcode() == ISD::STORE) {
- StoreSDNode *ST = cast<StoreSDNode>(Node);
- EVT StVT = ST->getMemoryVT();
- MVT ValVT = ST->getValue().getSimpleValueType();
- if (StVT.isVector() && ST->isTruncatingStore()) {
- LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
- Node->dump(&DAG));
- switch (TLI.getTruncStoreAction(ValVT, StVT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- return TranslateLegalizeResults(Op, Node);
- case TargetLowering::Custom: {
- SmallVector<SDValue, 1> ResultVals;
- if (LowerOperationWrapper(Node, ResultVals)) {
- if (ResultVals.empty())
- return TranslateLegalizeResults(Op, Node);
-
- Changed = true;
- return RecursivelyLegalizeResults(Op, ResultVals);
- }
- LLVM_FALLTHROUGH;
- }
- case TargetLowering::Expand: {
- Changed = true;
- SDValue Chain = ExpandStore(Node);
- AddLegalizedOperand(Op, Chain);
- return Chain;
- }
- }
- }
- }
-
bool HasVectorValueOrOp =
llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
llvm::any_of(Node->op_values(),
@@ -329,6 +266,22 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
switch (Op.getOpcode()) {
default:
return TranslateLegalizeResults(Op, Node);
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ EVT LoadedVT = LD->getMemoryVT();
+ if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
+ Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT);
+ break;
+ }
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ EVT StVT = ST->getMemoryVT();
+ MVT ValVT = ST->getValue().getSimpleValueType();
+ if (StVT.isVector() && ST->isTruncatingStore())
+ Action = TLI.getTruncStoreAction(ValVT, StVT);
+ break;
+ }
case ISD::MERGE_VALUES:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
// This operation lies about being legal: when it claims to be legal,
@@ -512,6 +465,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
switch (Action) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
+ assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
+ "This action is not supported yet!");
LLVM_DEBUG(dbgs() << "Promoting\n");
Promote(Node, ResultVals);
assert(!ResultVals.empty() && "No results for promotion?");
@@ -731,8 +686,16 @@ SDValue VectorLegalizer::ExpandStore(SDNode *N) {
}
void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
- SDValue Tmp;
switch (Node->getOpcode()) {
+ case ISD::LOAD: {
+ std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ return;
+ }
+ case ISD::STORE:
+ Results.push_back(ExpandStore(Node));
+ return;
case ISD::MERGE_VALUES:
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
Results.push_back(Node->getOperand(i));
@@ -804,15 +767,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
break;
case ISD::FSHL:
case ISD::FSHR:
- if (TLI.expandFunnelShift(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
case ISD::ROTL:
case ISD::ROTR:
- if (TLI.expandROT(Node, false /*AllowVectorOps*/, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 2695ed36991c..3d5c4c5b1cae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -168,10 +168,9 @@ void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
NumNodesSolelyBlocking.resize(SUnits->size(), 0);
- for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
- SUnit *SU = &(*SUnits)[i];
- initNumRegDefsLeft(SU);
- SU->NodeQueueId = 0;
+ for (SUnit &SU : *SUnits) {
+ initNumRegDefsLeft(&SU);
+ SU.NodeQueueId = 0;
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 84e6d2a16422..aec2cf38b400 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -442,33 +442,32 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
bool UnitLatencies = forceUnitLatencies();
// Pass 2: add the preds, succs, etc.
- for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
- SUnit *SU = &SUnits[su];
- SDNode *MainNode = SU->getNode();
+ for (SUnit &SU : SUnits) {
+ SDNode *MainNode = SU.getNode();
if (MainNode->isMachineOpcode()) {
unsigned Opc = MainNode->getMachineOpcode();
const MCInstrDesc &MCID = TII->get(Opc);
for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
- SU->isTwoAddress = true;
+ SU.isTwoAddress = true;
break;
}
}
if (MCID.isCommutable())
- SU->isCommutable = true;
+ SU.isCommutable = true;
}
// Find all predecessors and successors of the group.
- for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ for (SDNode *N = SU.getNode(); N; N = N->getGluedNode()) {
if (N->isMachineOpcode() &&
TII->get(N->getMachineOpcode()).getImplicitDefs()) {
- SU->hasPhysRegClobbers = true;
+ SU.hasPhysRegClobbers = true;
unsigned NumUsed = InstrEmitter::CountResults(N);
while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
--NumUsed; // Skip over unused values at the end.
if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
- SU->hasPhysRegDefs = true;
+ SU.hasPhysRegDefs = true;
}
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
@@ -477,7 +476,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (isPassiveNode(OpN)) continue; // Not scheduled.
SUnit *OpSU = &SUnits[OpN->getNodeId()];
assert(OpSU && "Node has no SUnit!");
- if (OpSU == SU) continue; // In the same group.
+ if (OpSU == &SU)
+ continue; // In the same group.
EVT OpVT = N->getOperand(i).getValueType();
assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
@@ -508,10 +508,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
Dep.setLatency(OpLatency);
if (!isChain && !UnitLatencies) {
computeOperandLatency(OpN, N, i, Dep);
- ST.adjustSchedDependency(OpSU, DefIdx, SU, i, Dep);
+ ST.adjustSchedDependency(OpSU, DefIdx, &SU, i, Dep);
}
- if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+ if (!SU.addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
// Multiple register uses are combined in the same SUnit. For example,
// we could have a set of glued nodes with all their defs consumed by
// another set of glued nodes. Register pressure tracking sees this as
@@ -721,10 +721,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const {
///
void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
- unsigned Noops = 0;
- for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
- if (!Sequence[i])
- ++Noops;
+ unsigned Noops = llvm::count(Sequence, nullptr);
assert(Sequence.size() - Noops == ScheduledNodes &&
"The number of nodes scheduled doesn't match the expected number!");
}
@@ -911,8 +908,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
}
}
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- SUnit *SU = Sequence[i];
+ for (SUnit *SU : Sequence) {
if (!SU) {
// Null SUnit* is a noop.
TII->insertNoop(*Emitter.getBlock(), InsertPos);
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 540a6e3efbe1..10940478010e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -169,11 +169,11 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
releaseSuccessors(&EntrySU);
// All leaves to AvailableQueue.
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ for (SUnit &SU : SUnits) {
// It is available if it has no predecessors.
- if (SUnits[i].Preds.empty()) {
- AvailableQueue->push(&SUnits[i]);
- SUnits[i].isAvailable = true;
+ if (SU.Preds.empty()) {
+ AvailableQueue->push(&SU);
+ SU.isAvailable = true;
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c282e03387dd..2ae0d4df7b77 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2499,7 +2499,8 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
/// sense to specify which elements are demanded or undefined, therefore
/// they are simply ignored.
bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
- APInt &UndefElts, unsigned Depth) {
+ APInt &UndefElts, unsigned Depth) const {
+ unsigned Opcode = V.getOpcode();
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
@@ -2511,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
// Deal with some common cases here that work for both fixed and scalable
// vector types.
- switch (V.getOpcode()) {
+ switch (Opcode) {
case ISD::SPLAT_VECTOR:
UndefElts = V.getOperand(0).isUndef()
? APInt::getAllOnes(DemandedElts.getBitWidth())
@@ -2537,7 +2538,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1);
- }
+ default:
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
+ return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, Depth);
+ break;
+}
// We don't support other cases than those above for scalable vectors at
// the moment.
@@ -2548,7 +2554,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
UndefElts = APInt::getZero(NumElts);
- switch (V.getOpcode()) {
+ switch (Opcode) {
case ISD::BUILD_VECTOR: {
SDValue Scl;
for (unsigned i = 0; i != NumElts; ++i) {
@@ -2600,13 +2606,30 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
break;
}
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ // Widen the demanded elts by the src element count.
+ SDValue Src = V.getOperand(0);
+ // We don't support scalable vectors at the moment.
+ if (Src.getValueType().isScalableVector())
+ return false;
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt UndefSrcElts;
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+ if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) {
+ UndefElts = UndefSrcElts.truncOrSelf(NumElts);
+ return true;
+ }
+ break;
+ }
}
return false;
}
/// Helper wrapper to main isSplatValue function.
-bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
+bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const {
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
@@ -5291,9 +5314,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (isUndef(Opcode, Ops))
return getUNDEF(VT);
- // Handle the case of two scalars.
+ // Handle binops special cases.
if (NumOps == 2) {
- // TODO: Move foldConstantFPMath here?
+ if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops[0], Ops[1]))
+ return CFP;
if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) {
@@ -5463,10 +5487,11 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
// should. That will require dealing with a potentially non-default
// rounding mode, checking the "opStatus" return value from the APFloat
// math calculations, and possibly other variations.
- auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
- auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false);
+ ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false);
if (N1CFP && N2CFP) {
- APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF();
+ APFloat C1 = N1CFP->getValueAPF(); // make copy
+ const APFloat &C2 = N2CFP->getValueAPF();
switch (Opcode) {
case ISD::FADD:
C1.add(C2, APFloat::rmNearestTiesToEven);
@@ -5486,6 +5511,14 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
case ISD::FCOPYSIGN:
C1.copySign(C2);
return getConstantFP(C1, DL, VT);
+ case ISD::FMINNUM:
+ return getConstantFP(minnum(C1, C2), DL, VT);
+ case ISD::FMAXNUM:
+ return getConstantFP(maxnum(C1, C2), DL, VT);
+ case ISD::FMINIMUM:
+ return getConstantFP(minimum(C1, C2), DL, VT);
+ case ISD::FMAXIMUM:
+ return getConstantFP(maximum(C1, C2), DL, VT);
default: break;
}
}
@@ -5502,8 +5535,9 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
switch (Opcode) {
case ISD::FSUB:
// -0.0 - undef --> undef (consistent with "fneg undef")
- if (N1CFP && N1CFP->getValueAPF().isNegZero() && N2.isUndef())
- return getUNDEF(VT);
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true))
+ if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef())
+ return getUNDEF(VT);
LLVM_FALLTHROUGH;
case ISD::FADD:
@@ -5962,9 +5996,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
return SV;
- if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2))
- return V;
-
// Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) {
if (TLI->isCommutativeBinOp(Opcode)) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7726a0007e44..63cd723cf6da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1036,7 +1036,6 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
AA = aa;
GFI = gfi;
LibInfo = li;
- DL = &DAG.getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
@@ -1626,6 +1625,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C))
return getValue(Equiv->getGlobalValue());
+ if (const auto *NC = dyn_cast<NoCFIValue>(C))
+ return getValue(NC->getGlobalValue());
+
VectorType *VecTy = cast<VectorType>(V->getType());
// Now that we know the number and type of the elements, get that number of
@@ -1921,8 +1923,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
DAG.getDataLayout().getAllocaAddrSpace()),
PtrValueVTs);
- SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
- DemoteReg, PtrValueVTs[0]);
+ SDValue RetPtr =
+ DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]);
SDValue RetOp = getValue(I.getOperand(0));
SmallVector<EVT, 4> ValueVTs, MemVTs;
@@ -2657,7 +2659,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDLoc dl = getCurSDLoc();
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
const Module &M = *ParentBB->getParent()->getFunction().getParent();
- Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+ Align Align =
+ DAG.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
// Generate code to load the content of the guard slot.
SDValue GuardVal = DAG.getLoad(
@@ -3058,14 +3061,14 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
MachineBasicBlock *Last) {
// Update JTCases.
- for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i)
- if (SL->JTCases[i].first.HeaderBB == First)
- SL->JTCases[i].first.HeaderBB = Last;
+ for (JumpTableBlock &JTB : SL->JTCases)
+ if (JTB.first.HeaderBB == First)
+ JTB.first.HeaderBB = Last;
// Update BitTestCases.
- for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i)
- if (SL->BitTestCases[i].Parent == First)
- SL->BitTestCases[i].Parent = Last;
+ for (BitTestBlock &BTB : SL->BitTestCases)
+ if (BTB.Parent == First)
+ BTB.Parent = Last;
}
void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
@@ -3111,6 +3114,8 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
SDValue Op = getValue(I.getOperand(0));
SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
@@ -3881,7 +3886,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
if (Field) {
// N = N + Offset
- uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
+ uint64_t Offset =
+ DAG.getDataLayout().getStructLayout(StTy)->getElementOffset(Field);
// In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
@@ -3898,7 +3904,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// (and fix up the result later).
unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
MVT IdxTy = MVT::getIntegerVT(IdxSize);
- TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+ TypeSize ElementSize =
+ DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
// We intentionally mask away the high bits here; ElementSize may not
// fit in IdxTy.
APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
@@ -4788,7 +4795,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
// Use TargetConstant instead of a regular constant for immarg.
- EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), Arg->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
assert(CI->getBitWidth() <= 64 &&
"large intrinsic immediates not handled");
@@ -6571,7 +6578,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
} else {
EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
const Value *Global = TLI.getSDagStackGuard(M);
- Align Align = DL->getPrefTypeAlign(Global->getType());
+ Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType());
Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
MachinePointerInfo(Global, 0), Align,
MachineMemOperand::MOVolatile);
@@ -7127,12 +7134,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
SDValue VectorInduction = DAG.getNode(
- ISD::UADDO, sdl, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
- SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction.getValue(0),
+ ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep);
+ SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction,
VectorTripCount, ISD::CondCode::SETULT);
- setValue(&I, DAG.getNode(ISD::AND, sdl, CCVT,
- DAG.getNOT(sdl, VectorInduction.getValue(1), CCVT),
- SetCC));
+ setValue(&I, SetCC);
return;
}
case Intrinsic::experimental_vector_insert: {
@@ -7317,32 +7322,26 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
SmallVector<SDValue, 7> &OpValues,
- bool isGather) {
+ bool IsGather) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(0);
- MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
SDValue LD;
bool AddToChain = true;
- if (!isGather) {
+ if (!IsGather) {
// Do not serialize variable-length loads of constant memory with
// anything.
- MemoryLocation ML;
- if (VT.isScalableVector())
- ML = MemoryLocation::getAfter(PtrOperand);
- else
- ML = MemoryLocation(
- PtrOperand,
- LocationSize::precise(
- DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())),
- AAInfo);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
MMO, false /*IsExpanding */);
} else {
@@ -7380,18 +7379,20 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
SmallVector<SDValue, 7> &OpValues,
- bool isScatter) {
+ bool IsScatter) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
- MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
SDValue ST;
- if (!isScatter) {
+ if (!IsScatter) {
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
ST =
DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1],
OpValues[2], OpValues[3], MMO, false /* IsTruncating */);
@@ -7690,8 +7691,9 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
- if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
- const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL))
+ if (const Constant *LoadCst =
+ ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+ LoadTy, Builder.DAG.getDataLayout()))
return Builder.getValue(LoadCst);
}
@@ -9646,8 +9648,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// We push in swifterror return as the last element of CLI.Ins.
ArgListTy &Args = CLI.getArgs();
if (supportSwiftError()) {
- for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- if (Args[i].IsSwiftError) {
+ for (const ArgListEntry &Arg : Args) {
+ if (Arg.IsSwiftError) {
ISD::InputArg MyFlags;
MyFlags.VT = getPointerTy(DL);
MyFlags.ArgVT = EVT(getPointerTy(DL));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index d6122aa0a739..ea48042a5dcf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -190,7 +190,6 @@ public:
static const unsigned LowestSDNodeOrder = 1;
SelectionDAG &DAG;
- const DataLayout *DL = nullptr;
AAResults *AA = nullptr;
const TargetLibraryInfo *LibInfo;
@@ -568,9 +567,9 @@ private:
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues, bool isGather);
+ SmallVector<SDValue, 7> &OpValues, bool IsGather);
void visitVPStoreScatter(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues, bool isScatter);
+ SmallVector<SDValue, 7> &OpValues, bool IsScatter);
void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
void visitVAStart(const CallInst &I);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c7e37cf8ca14..77e11b364588 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -297,7 +297,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
#ifndef NDEBUG
dbgs() << "If a target marks an instruction with "
"'usesCustomInserter', it must implement "
- "TargetLowering::EmitInstrWithCustomInserter!";
+ "TargetLowering::EmitInstrWithCustomInserter!\n";
#endif
llvm_unreachable(nullptr);
}
@@ -1784,27 +1784,25 @@ SelectionDAGISel::FinishBasicBlock() {
}
// Update PHI Nodes
- for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
- pi != pe; ++pi) {
- MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ for (const std::pair<MachineInstr *, unsigned> &P :
+ FuncInfo->PHINodesToUpdate) {
+ MachineInstrBuilder PHI(*MF, P.first);
MachineBasicBlock *PHIBB = PHI->getParent();
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
// This is "default" BB. We have two jumps to it. From "header" BB and
// from last "case" BB, unless the latter was skipped.
if (PHIBB == BTB.Default) {
- PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent);
+ PHI.addReg(P.second).addMBB(BTB.Parent);
if (!BTB.ContiguousRange) {
- PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
- .addMBB(BTB.Cases.back().ThisBB);
+ PHI.addReg(P.second).addMBB(BTB.Cases.back().ThisBB);
}
}
// One of "cases" BB.
- for (unsigned j = 0, ej = BTB.Cases.size();
- j != ej; ++j) {
- MachineBasicBlock* cBB = BTB.Cases[j].ThisBB;
+ for (const SwitchCG::BitTestCase &BT : BTB.Cases) {
+ MachineBasicBlock* cBB = BT.ThisBB;
if (cBB->isSuccessor(PHIBB))
- PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB);
+ PHI.addReg(P.second).addMBB(cBB);
}
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 737695b5eabe..e6b06ab93d6b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3136,6 +3136,19 @@ bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return false;
}
+bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use isSplatValue if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
// work with truncating build vectors and vectors with elements of less than
// 8 bits.
@@ -4853,13 +4866,9 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
}
// Now select chosen alternative in each constraint.
- for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
- cIndex != eIndex; ++cIndex) {
- AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
- if (cInfo.Type == InlineAsm::isClobber)
- continue;
- cInfo.selectAlternative(bestMAIndex);
- }
+ for (AsmOperandInfo &cInfo : ConstraintOperands)
+ if (cInfo.Type != InlineAsm::isClobber)
+ cInfo.selectAlternative(bestMAIndex);
}
}
@@ -4927,9 +4936,9 @@ TargetLowering::ConstraintWeight
ConstraintWeight BestWeight = CW_Invalid;
// Loop over the options, keeping track of the most general one.
- for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ for (const std::string &rCode : *rCodes) {
ConstraintWeight weight =
- getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ getSingleConstraintMatchWeight(info, rCode.c_str());
if (weight > BestWeight)
BestWeight = weight;
}
@@ -6550,15 +6559,15 @@ static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
true);
}
-bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandFunnelShift(SDNode *Node,
+ SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
- return false;
+ return SDValue();
SDValue X = Node->getOperand(0);
SDValue Y = Node->getOperand(1);
@@ -6592,8 +6601,7 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
}
Z = DAG.getNOT(DL, Z, ShVT);
}
- Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
- return true;
+ return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
}
SDValue ShX, ShY;
@@ -6633,13 +6641,12 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
}
}
- Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
- return true;
+ return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
}
// TODO: Merge with expandFunnelShift.
-bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
- SDValue &Result, SelectionDAG &DAG) const {
+SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
+ SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
bool IsLeft = Node->getOpcode() == ISD::ROTL;
@@ -6650,12 +6657,12 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
EVT ShVT = Op1.getValueType();
SDValue Zero = DAG.getConstant(0, DL, ShVT);
- // If a rotate in the other direction is supported, use it.
+ // If a rotate in the other direction is more supported, use it.
unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
- if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
+ if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
+ isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
- Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
- return true;
+ return DAG.getNode(RevRot, DL, VT, Op0, Sub);
}
if (!AllowVectorOps && VT.isVector() &&
@@ -6664,7 +6671,7 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
- return false;
+ return SDValue();
unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
@@ -6690,8 +6697,7 @@ bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
HsVal =
DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
}
- Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
- return true;
+ return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
}
void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
@@ -8048,7 +8054,8 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
return DAG.UnrollVectorOp(Node);
- SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
return DAG.getSelect(DL, VT, Cond, Op0, Op1);
}
diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 86b559fd6413..43a54ce33bf0 100644
--- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -162,8 +162,8 @@ Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
// doInitialization creates the generic version of this type.
std::vector<Type *> EltTys;
EltTys.push_back(StackEntryTy);
- for (size_t I = 0; I != Roots.size(); I++)
- EltTys.push_back(Roots[I].second->getAllocatedType());
+ for (const std::pair<CallInst *, AllocaInst *> &Root : Roots)
+ EltTys.push_back(Root.second->getAllocatedType());
return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str());
}
@@ -240,8 +240,8 @@ void ShadowStackGCLowering::CollectRoots(Function &F) {
SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots;
for (BasicBlock &BB : F)
- for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;)
- if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ for (Instruction &I : BB)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(&I))
if (Function *F = CI->getCalledFunction())
if (F->getIntrinsicID() == Intrinsic::gcroot) {
std::pair<CallInst *, AllocaInst *> Pair = std::make_pair(
@@ -377,9 +377,9 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
// Delete the original allocas (which are no longer used) and the intrinsic
// calls (which are no longer valid). Doing this last avoids invalidating
// iterators.
- for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
- Roots[I].first->eraseFromParent();
- Roots[I].second->eraseFromParent();
+ for (std::pair<CallInst *, AllocaInst *> &Root : Roots) {
+ Root.first->eraseFromParent();
+ Root.second->eraseFromParent();
}
Roots.clear();
diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 5ccfacfc26dc..3640296adbca 100644
--- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -131,15 +131,15 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
bool HasStackMap = false;
// Reverse iterate over all instructions and add the current live register
// set to an instruction if we encounter a patchpoint instruction.
- for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
- if (I->getOpcode() == TargetOpcode::PATCHPOINT) {
- addLiveOutSetToMI(MF, *I);
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+ addLiveOutSetToMI(MF, MI);
HasChanged = true;
HasStackMap = true;
++NumStackMaps;
}
- LLVM_DEBUG(dbgs() << " " << LiveRegs << " " << *I);
- LiveRegs.stepBackward(*I);
+ LLVM_DEBUG(dbgs() << " " << LiveRegs << " " << MI);
+ LiveRegs.stepBackward(MI);
}
++NumBBsVisited;
if (!HasStackMap)
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 7445f77c955d..6765fd274686 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -162,7 +162,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
}
bool StackProtector::HasAddressTaken(const Instruction *AI,
- uint64_t AllocSize) {
+ TypeSize AllocSize) {
const DataLayout &DL = M->getDataLayout();
for (const User *U : AI->users()) {
const auto *I = cast<Instruction>(U);
@@ -170,7 +170,8 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// the bounds of the allocated object.
Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
if (MemLoc.hasValue() && MemLoc->Size.hasValue() &&
- MemLoc->Size.getValue() > AllocSize)
+ !TypeSize::isKnownGE(AllocSize,
+ TypeSize::getFixed(MemLoc->Size.getValue())))
return true;
switch (I->getOpcode()) {
case Instruction::Store:
@@ -203,13 +204,19 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// would use it could also be out-of-bounds meaning stack protection is
// required.
const GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
- unsigned TypeSize = DL.getIndexTypeSizeInBits(I->getType());
- APInt Offset(TypeSize, 0);
- APInt MaxOffset(TypeSize, AllocSize);
- if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.ugt(MaxOffset))
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(I->getType());
+ APInt Offset(IndexSize, 0);
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ return true;
+ TypeSize OffsetSize = TypeSize::Fixed(Offset.getLimitedValue());
+ if (!TypeSize::isKnownGT(AllocSize, OffsetSize))
return true;
// Adjust AllocSize to be the space remaining after this offset.
- if (HasAddressTaken(I, AllocSize - Offset.getLimitedValue()))
+ // We can't subtract a fixed size from a scalable one, so in that case
+ // assume the scalable value is of minimum size.
+ TypeSize NewAllocSize =
+ TypeSize::Fixed(AllocSize.getKnownMinValue()) - OffsetSize;
+ if (HasAddressTaken(I, NewAllocSize))
return true;
break;
}
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index f49ba5ccd447..17e6f51d0899 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -325,8 +325,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Color spill slot intervals:\n");
bool Changed = false;
- for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
- LiveInterval *li = SSIntervals[i];
+ for (LiveInterval *li : SSIntervals) {
int SS = Register::stackSlot2Index(li->reg());
int NewSS = ColorSlot(li);
assert(NewSS >= 0 && "Stack coloring failed?");
@@ -338,8 +337,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
}
LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n");
- for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
- LiveInterval *li = SSIntervals[i];
+ for (LiveInterval *li : SSIntervals) {
int SS = Register::stackSlot2Index(li->reg());
li->setWeight(SlotWeights[SS]);
}
@@ -347,8 +345,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
llvm::stable_sort(SSIntervals, IntervalSorter());
#ifndef NDEBUG
- for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
- LLVM_DEBUG(SSIntervals[i]->dump());
+ for (LiveInterval *li : SSIntervals)
+ LLVM_DEBUG(li->dump());
LLVM_DEBUG(dbgs() << '\n');
#endif
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 54fc6ee45d00..68a7b80d6146 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -207,35 +207,34 @@ bool TailDuplicator::tailDuplicateAndUpdate(
// Add the new vregs as available values.
DenseMap<Register, AvailableValsTy>::iterator LI =
SSAUpdateVals.find(VReg);
- for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
- MachineBasicBlock *SrcBB = LI->second[j].first;
- Register SrcReg = LI->second[j].second;
+ for (std::pair<MachineBasicBlock *, Register> &J : LI->second) {
+ MachineBasicBlock *SrcBB = J.first;
+ Register SrcReg = J.second;
SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
}
+ SmallVector<MachineOperand *> DebugUses;
// Rewrite uses that are outside of the original def's block.
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
- // Only remove instructions after loop, as DBG_VALUE_LISTs with multiple
- // uses of VReg may invalidate the use iterator when erased.
- SmallPtrSet<MachineInstr *, 4> InstrsToRemove;
- while (UI != MRI->use_end()) {
- MachineOperand &UseMO = *UI;
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(VReg))) {
MachineInstr *UseMI = UseMO.getParent();
- ++UI;
+ // Rewrite debug uses last so that they can take advantage of any
+ // register mappings introduced by other users in its BB, since we
+ // cannot create new register definitions specifically for the debug
+ // instruction (as debug instructions should not affect CodeGen).
if (UseMI->isDebugValue()) {
- // SSAUpdate can replace the use with an undef. That creates
- // a debug instruction that is a kill.
- // FIXME: Should it SSAUpdate job to delete debug instructions
- // instead of replacing the use with undef?
- InstrsToRemove.insert(UseMI);
+ DebugUses.push_back(&UseMO);
continue;
}
if (UseMI->getParent() == DefBB && !UseMI->isPHI())
continue;
SSAUpdate.RewriteUse(UseMO);
}
- for (auto *MI : InstrsToRemove)
- MI->eraseFromParent();
+ for (auto *UseMO : DebugUses) {
+ MachineInstr *UseMI = UseMO->getParent();
+ UseMO->setReg(
+ SSAUpdate.GetValueInMiddleOfBlock(UseMI->getParent(), true));
+ }
}
SSAUpdateVRs.clear();
@@ -511,8 +510,8 @@ void TailDuplicator::updateSuccessorsPHIs(
SSAUpdateVals.find(Reg);
if (LI != SSAUpdateVals.end()) {
// This register is defined in the tail block.
- for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
- MachineBasicBlock *SrcBB = LI->second[j].first;
+ for (const std::pair<MachineBasicBlock *, Register> &J : LI->second) {
+ MachineBasicBlock *SrcBB = J.first;
// If we didn't duplicate a bb into a particular predecessor, we
// might still have added an entry to SSAUpdateVals to correcly
// recompute SSA. If that case, avoid adding a dummy extra argument
@@ -520,7 +519,7 @@ void TailDuplicator::updateSuccessorsPHIs(
if (!SrcBB->isSuccessor(SuccBB))
continue;
- Register SrcReg = LI->second[j].second;
+ Register SrcReg = J.second;
if (Idx != 0) {
MI.getOperand(Idx).setReg(SrcReg);
MI.getOperand(Idx + 1).setMBB(SrcBB);
@@ -531,8 +530,7 @@ void TailDuplicator::updateSuccessorsPHIs(
}
} else {
// Live in tail block, must also be live in predecessors.
- for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
- MachineBasicBlock *SrcBB = TDBBs[j];
+ for (MachineBasicBlock *SrcBB : TDBBs) {
if (Idx != 0) {
MI.getOperand(Idx).setReg(Reg);
MI.getOperand(Idx + 1).setMBB(SrcBB);
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 5119dac36713..3f22cc4289f2 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -436,7 +436,7 @@ MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const {
assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated");
MachineFunction &MF = *MBB.getParent();
- return MF.CloneMachineInstrBundle(MBB, InsertBefore, Orig);
+ return MF.cloneMachineInstrBundle(MBB, InsertBefore, Orig);
}
// If the COPY instruction in MI can be folded to a stack operation, return
@@ -1418,3 +1418,16 @@ void TargetInstrInfo::mergeOutliningCandidateAttributes(
}))
F.addFnAttr(Attribute::NoUnwind);
}
+
+bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
+ // Some instrumentations create special TargetOpcode at the start which
+ // expands to special code sequences which must be present.
+ auto First = MBB.getFirstNonDebugInstr();
+ if (First != MBB.end() &&
+ (First->getOpcode() == TargetOpcode::FENTRY_CALL ||
+ First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER))
+ return false;
+
+ return true;
+}
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index c0a7efff9e98..6fc6881f8736 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1187,7 +1187,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// all stack slots), but we need to handle the different type of stackmap
// operands and memory effects here.
- if (!llvm::any_of(MI->operands(),
+ if (llvm::none_of(MI->operands(),
[](MachineOperand &Operand) { return Operand.isFI(); }))
return MBB;
diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index f4bb71535f7f..f5cb518fce3e 100644
--- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -248,8 +248,8 @@ static void getAllocatableSetForRC(const MachineFunction &MF,
const TargetRegisterClass *RC, BitVector &R){
assert(RC->isAllocatable() && "invalid for nonallocatable sets");
ArrayRef<MCPhysReg> Order = RC->getRawAllocationOrder(MF);
- for (unsigned i = 0; i != Order.size(); ++i)
- R.set(Order[i]);
+ for (MCPhysReg PR : Order)
+ R.set(PR);
}
BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index c9a19948ff2f..3426a03b6083 100644
--- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -144,23 +144,22 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
}
// Actually remove the blocks now.
- for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ for (MachineBasicBlock *BB : DeadBlocks) {
// Remove any call site information for calls in the block.
- for (auto &I : DeadBlocks[i]->instrs())
+ for (auto &I : BB->instrs())
if (I.shouldUpdateCallSiteInfo())
- DeadBlocks[i]->getParent()->eraseCallSiteInfo(&I);
+ BB->getParent()->eraseCallSiteInfo(&I);
- DeadBlocks[i]->eraseFromParent();
+ BB->eraseFromParent();
}
// Cleanup PHI nodes.
- for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- MachineBasicBlock *BB = &*I;
+ for (MachineBasicBlock &BB : F) {
// Prune unneeded PHI entries.
- SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
- BB->pred_end());
- MachineBasicBlock::iterator phi = BB->begin();
- while (phi != BB->end() && phi->isPHI()) {
+ SmallPtrSet<MachineBasicBlock*, 8> preds(BB.pred_begin(),
+ BB.pred_end());
+ MachineBasicBlock::iterator phi = BB.begin();
+ while (phi != BB.end() && phi->isPHI()) {
for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
if (!preds.count(phi->getOperand(i).getMBB())) {
phi->RemoveOperand(i);
@@ -189,7 +188,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// insert a COPY instead of simply replacing the output
// with the input.
const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
- BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(),
+ BuildMI(BB, BB.getFirstNonPHI(), phi->getDebugLoc(),
TII->get(TargetOpcode::COPY), OutputReg)
.addReg(InputReg, getRegState(Input), InputSub);
}
diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
new file mode 100644
index 000000000000..cbc5d9ec169b
--- /dev/null
+++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -0,0 +1,1009 @@
+//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/VLIWMachineScheduler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iomanip>
+#include <limits>
+#include <memory>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-scheduler"
+
+static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false));
+
+static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true));
+
+static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
+ cl::Hidden, cl::ZeroOrMore,
+ cl::init(1));
+
+// Check if the scheduler should penalize instructions that are available to
+// early due to a zero-latency dependence.
+static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true));
+
+// This value is used to determine if a register class is a high pressure set.
+// We compute the maximum number of registers needed and divided by the total
+// available. Then, we compare the result to this value.
+static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
+ cl::init(0.75f),
+ cl::desc("High register pressure threhold."));
+
+VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
+ const TargetSchedModel *SM)
+ : TII(STI.getInstrInfo()), SchedModel(SM) {
+ ResourcesModel = createPacketizer(STI);
+
+ // This hard requirement could be relaxed,
+ // but for now do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ Packet.reserve(SchedModel->getIssueWidth());
+ Packet.clear();
+ ResourcesModel->clearResources();
+}
+
+void VLIWResourceModel::reset() {
+ Packet.clear();
+ ResourcesModel->clearResources();
+}
+
+VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
+
+/// Return true if there is a dependence between SUd and SUu.
+bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {
+ if (SUd->Succs.size() == 0)
+ return false;
+
+ for (const auto &S : SUd->Succs) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order dependencies.
+ if (S.isCtrl())
+ continue;
+
+ if (S.getSUnit() == SUu && S.getLatency() > 0)
+ return true;
+ }
+ return false;
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+/// It is _not_ precise (statefull), it is more like
+/// another heuristic. Many corner cases are figured
+/// empirically.
+bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
+ if (!SU || !SU->getInstr())
+ return false;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(*SU->getInstr()))
+ return false;
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ break;
+ }
+
+ // Now see if there are no other dependencies to instructions already
+ // in the packet.
+ if (IsTop) {
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ if (hasDependence(Packet[i], SU))
+ return false;
+ } else {
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ if (hasDependence(SU, Packet[i]))
+ return false;
+ }
+ return true;
+}
+
+/// Keep track of available resources.
+bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
+ bool startNewCycle = false;
+ // Artificially reset state.
+ if (!SU) {
+ reset();
+ TotalPackets++;
+ return false;
+ }
+ // If this SU does not fit in the packet or the packet is now full
+ // start a new one.
+ if (!isResourceAvailable(SU, IsTop) ||
+ Packet.size() >= SchedModel->getIssueWidth()) {
+ reset();
+ TotalPackets++;
+ startNewCycle = true;
+ }
+
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ ResourcesModel->reserveResources(*SU->getInstr());
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::CFI_INSTRUCTION:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ break;
+ }
+ Packet.push_back(SU);
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+ LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
+ LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+ LLVM_DEBUG(Packet[i]->getInstr()->dump());
+ }
+#endif
+
+ return startNewCycle;
+}
+
+DFAPacketizer *
+VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
+ return STI.getInstrInfo()->CreateTargetScheduleState(STI);
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+void VLIWMachineScheduler::schedule() {
+ LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
+ << printMBBReference(*BB) << " " << BB->getName()
+ << " in_func " << BB->getParent()->getName()
+ << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
+
+ buildDAGWithRegPressure();
+
+ Topo.InitDAGTopologicalSorting();
+
+ // Postprocess the DAG to add platform-specific artificial dependencies.
+ postprocessDAG();
+
+ SmallVector<SUnit *, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ SchedImpl->initialize(this);
+
+ LLVM_DEBUG({
+ unsigned maxH = 0;
+ for (const SUnit &SU : SUnits)
+ if (SU.getHeight() > maxH)
+ maxH = SU.getHeight();
+ dbgs() << "Max Height " << maxH << "\n";
+ });
+ LLVM_DEBUG({
+ unsigned maxD = 0;
+ for (const SUnit &SU : SUnits)
+ if (SU.getDepth() > maxD)
+ maxD = SU.getDepth();
+ dbgs() << "Max Depth " << maxD << "\n";
+ });
+ LLVM_DEBUG(dump());
+ if (ViewMISchedDAGs)
+ viewGraph();
+
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (true) {
+ LLVM_DEBUG(
+ dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU)
+ break;
+
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ // Notify the scheduling strategy after updating the DAG.
+ SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule for "
+ << printMBBReference(*begin()->getParent()) << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
+ DAG = static_cast<VLIWMachineScheduler *>(dag);
+ SchedModel = DAG->getSchedModel();
+
+ Top.init(DAG, SchedModel);
+ Bot.init(DAG, SchedModel);
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
+ const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ delete Top.HazardRec;
+ delete Bot.HazardRec;
+ Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ delete Top.ResourceModel;
+ delete Bot.ResourceModel;
+ Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
+ Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
+
+ const std::vector<unsigned> &MaxPressure =
+ DAG->getRegPressure().MaxSetPressure;
+ HighPressureSets.assign(MaxPressure.size(), 0);
+ for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
+ unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
+ HighPressureSets[i] =
+ ((float)MaxPressure[i] > ((float)Limit * RPThreshold));
+ }
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
+ const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
+ return new VLIWResourceModel(STI, SchedModel);
+}
+
+void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
+ for (const SDep &PI : SU->Preds) {
+ unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
+ unsigned MinLatency = PI.getLatency();
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+ SU->TopReadyCycle = PredReadyCycle + MinLatency;
+ }
+
+ if (!SU->isScheduled)
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
+ ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned MinLatency = I->getLatency();
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+ }
+
+ if (!SU->isScheduled)
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
+ delete ResourceModel;
+ delete HazardRec;
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if (IssueCount + uops > SchedModel->getIssueWidth())
+ return true;
+
+ return false;
+}
+
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
+ SUnit *SU, unsigned ReadyCycle) {
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+
+ Pending.push(SU);
+ else
+ Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
+ unsigned Width = SchedModel->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
+ "MinReadyCycle uninitialized");
+ unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ } else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+
+ LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
+ bool startNewCycle = false;
+
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+
+ // Update DFA model.
+ startNewCycle = ResourceModel->reserveResources(SU, isTop());
+
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+ if (startNewCycle) {
+ LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ } else
+ LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
+ << CurrCycle << '\n');
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin() + i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin() + i);
+ --i;
+ --e;
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ auto AdvanceCycle = [this]() {
+ if (Available.empty())
+ return true;
+ if (Available.size() == 1 && Pending.size() > 0)
+ return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
+ getWeakLeft(*Available.begin(), isTop()) != 0;
+ return false;
+ };
+ for (unsigned i = 0; AdvanceCycle(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard");
+ (void)i;
+ ResourceModel->reserveResources(nullptr, isTop());
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return nullptr;
+}
+
+#ifndef NDEBUG
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+ const ReadyQueue &Q, SUnit *SU,
+ int Cost, PressureChange P) {
+ dbgs() << Label << " " << Q.getName() << " ";
+ if (P.isValid())
+ dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
+ << P.getUnitInc() << " ";
+ else
+ dbgs() << " ";
+ dbgs() << "cost(" << Cost << ")\t";
+ DAG->dumpNode(*SU);
+}
+
+// Very detailed queue dump, to be used with higher verbosity levels.
+void ConvergingVLIWScheduler::readyQueueVerboseDump(
+ const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
+ ReadyQueue &Q) {
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+
+ dbgs() << ">>> " << Q.getName() << "\n";
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ std::stringstream dbgstr;
+ dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
+ dbgs() << dbgstr.str();
+ SchedulingCost(Q, *I, Candidate, RPDelta, true);
+ dbgs() << "\t";
+ (*I)->getInstr()->dump();
+ }
+ dbgs() << "\n";
+}
+#endif
+
+/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
+ if (SU->NumPredsLeft == 0)
+ return false;
+
+ for (auto &Pred : SU->Preds) {
+ // We found an available, but not scheduled, predecessor.
+ if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
+ return false;
+ }
+
+ return true;
+}
+
+/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
+ if (SU->NumSuccsLeft == 0)
+ return false;
+
+ for (auto &Succ : SU->Succs) {
+ // We found an available, but not scheduled, successor.
+ if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
+ return false;
+ }
+ return true;
+}
+
+/// Check if the instruction changes the register pressure of a register in the
+/// high pressure set. The function returns a negative value if the pressure
+/// decreases and a positive value is the pressure increases. If the instruction
+/// doesn't use a high pressure register or doesn't change the register
+/// pressure, then return 0.
+int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
+ PressureDiff &PD = DAG->getPressureDiff(SU);
+ for (auto &P : PD) {
+ if (!P.isValid())
+ continue;
+ // The pressure differences are computed bottom-up, so the comparision for
+ // an increase is positive in the bottom direction, but negative in the
+ // top-down direction.
+ if (HighPressureSets[P.getPSet()])
+ return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
+ }
+ return 0;
+}
+
+/// Single point to compute overall scheduling cost.
+/// TODO: More heuristics will be used soon.
+int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
+ SchedCandidate &Candidate,
+ RegPressureDelta &Delta,
+ bool verbose) {
+ // Initial trivial priority.
+ int ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (!SU || SU->isScheduled)
+ return ResCount;
+
+ LLVM_DEBUG(if (verbose) dbgs()
+ << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
+ // Forced priority is high.
+ if (SU->isScheduleHigh) {
+ ResCount += PriorityOne;
+ LLVM_DEBUG(dbgs() << "H|");
+ }
+
+ unsigned IsAvailableAmt = 0;
+ // Critical path first.
+ if (Q.getID() == TopQID) {
+ if (Top.isLatencyBound(SU)) {
+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");
+ ResCount += (SU->getHeight() * ScaleTwo);
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
+ dbgs() << dbgstr.str();
+ });
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Top.ResourceModel->isResourceAvailable(SU, true)) {
+ IsAvailableAmt = (PriorityTwo + PriorityThree);
+ ResCount += IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) dbgs() << "A|");
+ } else
+ LLVM_DEBUG(if (verbose) dbgs() << " |");
+ } else {
+ if (Bot.isLatencyBound(SU)) {
+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");
+ ResCount += (SU->getDepth() * ScaleTwo);
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
+ dbgs() << dbgstr.str();
+ });
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
+ IsAvailableAmt = (PriorityTwo + PriorityThree);
+ ResCount += IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) dbgs() << "A|");
+ } else
+ LLVM_DEBUG(if (verbose) dbgs() << " |");
+ }
+
+ unsigned NumNodesBlocking = 0;
+ if (Q.getID() == TopQID) {
+ // How many SUs does it block from scheduling?
+ // Look at all of the successors of this node.
+ // Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ if (Top.isLatencyBound(SU))
+ for (const SDep &SI : SU->Succs)
+ if (isSingleUnscheduledPred(SI.getSUnit(), SU))
+ ++NumNodesBlocking;
+ } else {
+ // How many unscheduled predecessors block this node?
+ if (Bot.isLatencyBound(SU))
+ for (const SDep &PI : SU->Preds)
+ if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
+ ++NumNodesBlocking;
+ }
+ ResCount += (NumNodesBlocking * ScaleTwo);
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
+ dbgs() << dbgstr.str();
+ });
+
+ // Factor in reg pressure as a heuristic.
+ if (!IgnoreBBRegPressure) {
+ // Decrease priority by the amount that register pressure exceeds the limit.
+ ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
+ // Decrease priority if register pressure exceeds the limit.
+ ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
+ // Decrease priority slightly if register pressure would increase over the
+ // current maximum.
+ ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
+ // If there are register pressure issues, then we remove the value added for
+ // the instruction being available. The rationale is that we really don't
+ // want to schedule an instruction that causes a spill.
+ if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
+ (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
+ Delta.CurrentMax.getUnitInc()))
+ ResCount -= IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) {
+ dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
+ << Delta.CriticalMax.getUnitInc() << "/"
+ << Delta.CurrentMax.getUnitInc() << ")|";
+ });
+ }
+
+ // Give preference to a zero latency instruction if the dependent
+ // instruction is in the current packet.
+ if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
+ for (const SDep &PI : SU->Preds) {
+ if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
+ PI.getLatency() == 0 &&
+ Top.ResourceModel->isInPacket(PI.getSUnit())) {
+ ResCount += PriorityThree;
+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");
+ }
+ }
+ } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
+ for (const SDep &SI : SU->Succs) {
+ if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
+ SI.getLatency() == 0 &&
+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {
+ ResCount += PriorityThree;
+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");
+ }
+ }
+ }
+
+ // If the instruction has a non-zero latency dependence with an instruction in
+ // the current packet, then it should not be scheduled yet. The case occurs
+ // when the dependent instruction is scheduled in a new packet, so the
+ // scheduler updates the current cycle and pending instructions become
+ // available.
+ if (CheckEarlyAvail) {
+ if (Q.getID() == TopQID) {
+ for (const auto &PI : SU->Preds) {
+ if (PI.getLatency() > 0 &&
+ Top.ResourceModel->isInPacket(PI.getSUnit())) {
+ ResCount -= PriorityOne;
+ LLVM_DEBUG(if (verbose) dbgs() << "D|");
+ }
+ }
+ } else {
+ for (const auto &SI : SU->Succs) {
+ if (SI.getLatency() > 0 &&
+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {
+ ResCount -= PriorityOne;
+ LLVM_DEBUG(if (verbose) dbgs() << "D|");
+ }
+ }
+ }
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "Total " << std::setw(4) << ResCount << ")";
+ dbgs() << dbgstr.str();
+ });
+
+ return ResCount;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingVLIWScheduler::CandResult
+ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate) {
+ ReadyQueue &Q = Zone.Available;
+ LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
+ readyQueueVerboseDump(RPTracker, Candidate, Q);
+ else Q.dump(););
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+
+ // BestSU remains NULL if no top candidates beat the best existing candidate.
+ CandResult FoundCandidate = NoCand;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
+
+ // Initialize the candidate if needed.
+ if (!Candidate.SU) {
+ LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+
+ // Choose node order for negative cost candidates. There is no good
+ // candidate in this case.
+ if (CurrentCost < 0 && Candidate.SCost < 0) {
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
+ (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ }
+ continue;
+ }
+
+ // Best cost.
+ if (CurrentCost > Candidate.SCost) {
+ LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ continue;
+ }
+
+ // Choose an instruction that does not depend on an artificial edge.
+ unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
+ unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
+ if (CurrWeak != CandWeak) {
+ if (CurrWeak < CandWeak) {
+ LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = Weak;
+ }
+ continue;
+ }
+
+ if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
+ unsigned CurrSize, CandSize;
+ if (Q.getID() == TopQID) {
+ CurrSize = (*I)->Succs.size();
+ CandSize = Candidate.SU->Succs.size();
+ } else {
+ CurrSize = (*I)->Preds.size();
+ CandSize = Candidate.SU->Preds.size();
+ }
+ if (CurrSize > CandSize) {
+ LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ }
+ // Keep the old candidate if it's a better candidate. That is, don't use
+ // the subsequent tie breaker.
+ if (CurrSize != CandSize)
+ continue;
+ }
+
+ // Tie breaker.
+ // To avoid scheduling indeterminism, we need a tie breaker
+ // for the case when cost is identical for two nodes.
+ if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
+ (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+ }
+
+ // Fall through to original instruction order.
+ // Only consider node order if Candidate was chosen from this Q.
+ if (FoundCandidate == NoCand)
+ continue;
+ }
+ return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
+ IsTopNode = false;
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ LLVM_DEBUG(dbgs() << "Picked only Top\n");
+ IsTopNode = true;
+ return SU;
+ }
+ SchedCandidate BotCand;
+ // Prefer bottom scheduling when heuristics are silent.
+ CandResult BotResult =
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+
+ if (TopResult == SingleExcess || TopResult == SingleCritical) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (BotResult == SingleMax) {
+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ if (TopResult == SingleMax) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ if (TopCand.SCost > BotCand.SCost) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate in node order.
+ LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
+ IsTopNode = false;
+ return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return nullptr;
+ }
+ SUnit *SU;
+ if (ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+ (void)TopResult;
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ } else if (ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate BotCand;
+ CandResult BotResult =
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+ (void)BotResult;
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else {
+ SU = pickNodeBidrectional(IsTopNode);
+ }
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling instruction in cycle "
+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
+ << reportPackets() << ")\n";
+ DAG->dumpNode(*SU));
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ Top.bumpNode(SU);
+ SU->TopReadyCycle = Top.CurrCycle;
+ } else {
+ Bot.bumpNode(SU);
+ SU->BotReadyCycle = Bot.CurrCycle;
+ }
+}
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 4876b9e23717..0c42bef82005 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -201,9 +201,11 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::x86amx: return Type::getX86_AMXTy(Context);
case MVT::i64x8: return IntegerType::get(Context, 512);
case MVT::externref:
+ // pointer to opaque struct in addrspace(10)
return PointerType::get(StructType::create(Context), 10);
case MVT::funcref:
- return PointerType::get(StructType::create(Context), 20);
+ // pointer to i8 addrspace(20)
+ return PointerType::get(Type::getInt8Ty(Context), 20);
case MVT::v1i1:
return FixedVectorType::get(Type::getInt1Ty(Context), 1);
case MVT::v2i1:
diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp
index 4564aa1c1278..d31183e46d65 100644
--- a/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -573,9 +573,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
const auto *CatchSwitch = cast<CatchSwitchInst>(Pad);
int CatchState = -1, FollowerState = -1;
SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers());
- for (auto CBI = CatchBlocks.rbegin(), CBE = CatchBlocks.rend();
- CBI != CBE; ++CBI, FollowerState = CatchState) {
- const BasicBlock *CatchBlock = *CBI;
+ for (const BasicBlock *CatchBlock : llvm::reverse(CatchBlocks)) {
// Create the entry for this catch with the appropriate handler
// properties.
const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI());
@@ -591,6 +589,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
Worklist.emplace_back(I, CatchState);
// Remember this catch's state.
FuncInfo.EHPadStateMap[Catch] = CatchState;
+ FollowerState = CatchState;
}
// Associate the catchswitch with the state of its first catch.
assert(CatchSwitch->getNumHandlers());
@@ -601,11 +600,9 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
// Step two: record the TryParentState of each state. For cleanuppads that
// don't have cleanuprets, we may need to infer this from their child pads,
// so visit pads in descendant-most to ancestor-most order.
- for (auto Entry = FuncInfo.ClrEHUnwindMap.rbegin(),
- End = FuncInfo.ClrEHUnwindMap.rend();
- Entry != End; ++Entry) {
+ for (ClrEHUnwindMapEntry &Entry : llvm::reverse(FuncInfo.ClrEHUnwindMap)) {
const Instruction *Pad =
- Entry->Handler.get<const BasicBlock *>()->getFirstNonPHI();
+ Entry.Handler.get<const BasicBlock *>()->getFirstNonPHI();
// For most pads, the TryParentState is the state associated with the
// unwind dest of exceptional exits from it.
const BasicBlock *UnwindDest;
@@ -615,7 +612,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
// that's not the unwind dest of exceptions escaping the catch. Those
// cases were already assigned a TryParentState in the first pass, so
// skip them.
- if (Entry->TryParentState != -1)
+ if (Entry.TryParentState != -1)
continue;
// Otherwise, get the unwind dest from the catchswitch.
UnwindDest = Catch->getCatchSwitch()->getUnwindDest();
@@ -692,7 +689,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()];
}
- Entry->TryParentState = UnwindDestState;
+ Entry.TryParentState = UnwindDestState;
}
// Step three: transfer information from pads to invokes.
diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 11d1b309aa64..b66429d8a5bf 100644
--- a/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -226,6 +226,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
case Triple::ArchType::arm:
case Triple::ArchType::thumb:
case Triple::ArchType::aarch64:
+ case Triple::ArchType::hexagon:
case Triple::ArchType::mips:
case Triple::ArchType::mipsel:
case Triple::ArchType::mips64:
diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
index a3dec6c25e44..ae0859e1ecfd 100644
--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -223,22 +223,21 @@ static void analyzeImportedModule(
SysRoot = CU.getSysRoot();
if (!SysRoot.empty() && Path.startswith(SysRoot))
return;
- if (Optional<DWARFFormValue> Val = DIE.find(dwarf::DW_AT_name))
- if (Optional<const char *> Name = Val->getAsCString()) {
- auto &Entry = (*ParseableSwiftInterfaces)[*Name];
- // The prepend path is applied later when copying.
- DWARFDie CUDie = CU.getOrigUnit().getUnitDIE();
- SmallString<128> ResolvedPath;
- if (sys::path::is_relative(Path))
- resolveRelativeObjectPath(ResolvedPath, CUDie);
- sys::path::append(ResolvedPath, Path);
- if (!Entry.empty() && Entry != ResolvedPath)
- ReportWarning(
- Twine("Conflicting parseable interfaces for Swift Module ") +
- *Name + ": " + Entry + " and " + Path,
- DIE);
- Entry = std::string(ResolvedPath.str());
- }
+ Optional<const char*> Name = dwarf::toString(DIE.find(dwarf::DW_AT_name));
+ if (!Name)
+ return;
+ auto &Entry = (*ParseableSwiftInterfaces)[*Name];
+ // The prepend path is applied later when copying.
+ DWARFDie CUDie = CU.getOrigUnit().getUnitDIE();
+ SmallString<128> ResolvedPath;
+ if (sys::path::is_relative(Path))
+ resolveRelativeObjectPath(ResolvedPath, CUDie);
+ sys::path::append(ResolvedPath, Path);
+ if (!Entry.empty() && Entry != ResolvedPath)
+ ReportWarning(Twine("Conflicting parseable interfaces for Swift Module ") +
+ *Name + ": " + Entry + " and " + Path,
+ DIE);
+ Entry = std::string(ResolvedPath.str());
}
/// The distinct types of work performed by the work loop in
@@ -409,10 +408,10 @@ static bool dieNeedsChildrenToBeMeaningful(uint32_t Tag) {
void DWARFLinker::cleanupAuxiliarryData(LinkContext &Context) {
Context.clear();
- for (auto I = DIEBlocks.begin(), E = DIEBlocks.end(); I != E; ++I)
- (*I)->~DIEBlock();
- for (auto I = DIELocs.begin(), E = DIELocs.end(); I != E; ++I)
- (*I)->~DIELoc();
+ for (DIEBlock *I : DIEBlocks)
+ I->~DIEBlock();
+ for (DIELoc *I : DIELocs)
+ I->~DIELoc();
DIEBlocks.clear();
DIELocs.clear();
@@ -846,7 +845,7 @@ void DWARFLinker::assignAbbrev(DIEAbbrev &Abbrev) {
unsigned DWARFLinker::DIECloner::cloneStringAttribute(
DIE &Die, AttributeSpec AttrSpec, const DWARFFormValue &Val,
const DWARFUnit &U, OffsetsStringPool &StringPool, AttributesInfo &Info) {
- Optional<const char *> String = Val.getAsCString();
+ Optional<const char *> String = dwarf::toString(Val);
if (!String)
return 0;
@@ -1423,6 +1422,11 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
Flags |= TF_InFunctionScope;
if (!Info.InDebugMap && LLVM_LIKELY(!Update))
Flags |= TF_SkipPC;
+ } else if (Abbrev->getTag() == dwarf::DW_TAG_variable) {
+ // Function-local globals could be in the debug map even when the function
+ // is not, e.g., inlined functions.
+ if ((Flags & TF_InFunctionScope) && Info.InDebugMap)
+ Flags &= ~TF_SkipPC;
}
for (const auto &AttrSpec : Abbrev->attributes()) {
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 46e7457f2368..1ab6ead3b5f6 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -531,9 +531,7 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
unsigned RowsSinceLastSequence = 0;
- for (unsigned Idx = 0; Idx < Rows.size(); ++Idx) {
- auto &Row = Rows[Idx];
-
+ for (DWARFDebugLine::Row &Row : Rows) {
int64_t AddressDelta;
if (Address == -1ULL) {
MS->emitIntValue(dwarf::DW_LNS_extended_op, 1);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index c8331487f282..95135c95e8d2 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1195,7 +1195,7 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram,
Die.getAttributeValueAsReferencedDie(DW_AT_abstract_origin))
Die = Origin;
if (auto NameAttr = Die.find(DW_AT_name))
- if (Optional<const char *> Name = NameAttr->getAsCString())
+ if (Optional<const char *> Name = dwarf::toString(*NameAttr))
Local.Name = *Name;
if (auto Type = Die.getAttributeValueAsReferencedDie(DW_AT_type))
Local.Size = getTypeSize(Type, getCUAddrSize());
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index bda41b1f34e9..f36d3f87257a 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -1331,8 +1331,8 @@ Optional<StringRef> DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileInd
if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex))
return None;
const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex);
- if (Optional<const char *> source = Entry.Source.getAsCString())
- return StringRef(*source);
+ if (auto E = dwarf::toString(Entry.Source))
+ return StringRef(*E);
return None;
}
@@ -1350,10 +1350,10 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex(
if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
return false;
const FileNameEntry &Entry = getFileNameEntry(FileIndex);
- Optional<const char *> Name = Entry.Name.getAsCString();
- if (!Name)
+ auto E = dwarf::toString(Entry.Name);
+ if (!E)
return false;
- StringRef FileName = *Name;
+ StringRef FileName = *E;
if (Kind == FileLineInfoKind::RawValue ||
isPathAbsoluteOnWindowsOrPosix(FileName)) {
Result = std::string(FileName);
@@ -1372,11 +1372,10 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex(
// relative names.
if ((Entry.DirIdx != 0 || Kind != FileLineInfoKind::RelativeFilePath) &&
Entry.DirIdx < IncludeDirectories.size())
- IncludeDir = IncludeDirectories[Entry.DirIdx].getAsCString().getValue();
+ IncludeDir = dwarf::toStringRef(IncludeDirectories[Entry.DirIdx]);
} else {
if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size())
- IncludeDir =
- IncludeDirectories[Entry.DirIdx - 1].getAsCString().getValue();
+ IncludeDir = dwarf::toStringRef(IncludeDirectories[Entry.DirIdx - 1]);
}
// For absolute paths only, include the compilation directory of compile unit.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index cdffb36741c8..f39c7871d603 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -41,9 +41,7 @@ public:
} // namespace
static Error createResolverError(uint32_t Index, unsigned Kind) {
- return createStringError(errc::invalid_argument,
- "Unable to resolve indirect address %u for: %s",
- Index, dwarf::LocListEncodingString(Kind).data());
+ return make_error<ResolverError>(Index, (dwarf::LoclistEntries)Kind);
}
Expected<Optional<DWARFLocationExpression>>
@@ -404,3 +402,10 @@ void DWARFDebugLoclists::dumpRange(uint64_t StartOffset, uint64_t Size,
OS << '\n';
}
}
+
+void llvm::ResolverError::log(raw_ostream &OS) const {
+ OS << format("unable to resolve indirect address %u for: %s", Index,
+ dwarf::LocListEncodingString(Kind).data());
+}
+
+char llvm::ResolverError::ID;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
index 80ffd81b3403..7a81d7ff064b 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -194,13 +194,11 @@ Error DWARFDebugMacro::parseImpl(
if (MacroContributionOffset == MacroToUnits.end())
return createStringError(errc::invalid_argument,
"Macro contribution of the unit not found");
- Optional<uint64_t> StrOffset =
+ Expected<uint64_t> StrOffset =
MacroContributionOffset->second->getStringOffsetSectionItem(
Data.getULEB128(&Offset));
if (!StrOffset)
- return createStringError(
- errc::invalid_argument,
- "String offsets contribution of the unit not found");
+ return StrOffset.takeError();
E.MacroStr =
MacroContributionOffset->second->getStringExtractor().getCStr(
&*StrOffset);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index ed50f2635738..5421b2d59a1b 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -89,7 +89,6 @@ static void dumpLocationList(raw_ostream &OS, const DWARFFormValue &FormValue,
U->getLocationTable().dumpLocationList(&Offset, OS, U->getBaseAddress(), MRI,
Ctx.getDWARFObj(), U, DumpOpts,
Indent);
- return;
}
static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue,
@@ -105,7 +104,6 @@ static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue,
Ctx.isLittleEndian(), 0);
DWARFExpression(Data, U->getAddressByteSize(), U->getFormParams().Format)
.print(OS, DumpOpts, MRI, U);
- return;
}
static DWARFDie resolveReferencedType(DWARFDie D,
@@ -672,6 +670,8 @@ struct DWARFTypePrinter {
return;
if (D.getTag() == DW_TAG_subprogram)
return;
+ if (D.getTag() == DW_TAG_lexical_block)
+ return;
D = D.resolveTypeUnitReference();
if (DWARFDie P = D.getParent())
appendScopes(P);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index d0fbd702e831..e19f5b8138fa 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -217,8 +217,8 @@ static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS,
if (DumpOpts.Verbose)
OS << format("0x%08" PRIx64 " -> ", Operands[Operand]);
OS << format("0x%08" PRIx64 ")", U->getOffset() + Operands[Operand]);
- if (auto Name = Die.find(dwarf::DW_AT_name))
- OS << " \"" << Name->getAsCString() << "\"";
+ if (auto Name = dwarf::toString(Die.find(dwarf::DW_AT_name)))
+ OS << " \"" << *Name << "\"";
} else {
OS << format(" <invalid base_type ref: 0x%" PRIx64 ">",
Operands[Operand]);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index cea0f63bbf81..86991a3949dd 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -613,50 +613,53 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
}
void DWARFFormValue::dumpString(raw_ostream &OS) const {
- Optional<const char *> DbgStr = getAsCString();
- if (DbgStr.hasValue()) {
+ if (auto DbgStr = dwarf::toString(*this)) {
auto COS = WithColor(OS, HighlightColor::String);
COS.get() << '"';
- COS.get().write_escaped(DbgStr.getValue());
+ COS.get().write_escaped(*DbgStr);
COS.get() << '"';
}
}
-Optional<const char *> DWARFFormValue::getAsCString() const {
+Expected<const char *> DWARFFormValue::getAsCString() const {
if (!isFormClass(FC_String))
- return None;
+ return make_error<StringError>("Invalid form for string attribute",
+ inconvertibleErrorCode());
if (Form == DW_FORM_string)
return Value.cstr;
// FIXME: Add support for DW_FORM_GNU_strp_alt
if (Form == DW_FORM_GNU_strp_alt || C == nullptr)
- return None;
+ return make_error<StringError>("Unsupported form for string attribute",
+ inconvertibleErrorCode());
uint64_t Offset = Value.uval;
- if (Form == DW_FORM_line_strp) {
- // .debug_line_str is tracked in the Context.
- if (const char *Str = C->getLineStringExtractor().getCStr(&Offset))
- return Str;
- return None;
- }
+ Optional<uint32_t> Index;
if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx ||
Form == DW_FORM_strx1 || Form == DW_FORM_strx2 || Form == DW_FORM_strx3 ||
Form == DW_FORM_strx4) {
if (!U)
- return None;
- Optional<uint64_t> StrOffset = U->getStringOffsetSectionItem(Offset);
+ return make_error<StringError>("API limitation - string extraction not "
+ "available without a DWARFUnit",
+ inconvertibleErrorCode());
+ Expected<uint64_t> StrOffset = U->getStringOffsetSectionItem(Offset);
+ Index = Offset;
if (!StrOffset)
- return None;
+ return StrOffset.takeError();
Offset = *StrOffset;
}
// Prefer the Unit's string extractor, because for .dwo it will point to
// .debug_str.dwo, while the Context's extractor always uses .debug_str.
- if (U) {
- if (const char *Str = U->getStringExtractor().getCStr(&Offset))
- return Str;
- return None;
- }
- if (const char *Str = C->getStringExtractor().getCStr(&Offset))
+ DataExtractor StrData = Form == DW_FORM_line_strp
+ ? C->getLineStringExtractor()
+ : U ? U->getStringExtractor()
+ : C->getStringExtractor();
+ if (const char *Str = StrData.getCStr(&Offset))
return Str;
- return None;
+ std::string Msg = FormEncodingString(Form).str();
+ if (Index)
+ Msg += (" uses index " + Twine(*Index) + ", but the referenced string").str();
+ Msg += (" offset " + Twine(Offset) + " is beyond .debug_str bounds").str();
+ return make_error<StringError>(Msg,
+ inconvertibleErrorCode());
}
Optional<uint64_t> DWARFFormValue::getAsAddress() const {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 82c34f537036..eed0a60ec75e 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -214,13 +214,17 @@ DWARFUnit::getAddrOffsetSectionItem(uint32_t Index) const {
return {{Address, Section}};
}
-Optional<uint64_t> DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const {
+Expected<uint64_t> DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const {
if (!StringOffsetsTableContribution)
- return None;
+ return make_error<StringError>(
+ "DW_FORM_strx used without a valid string offsets table",
+ inconvertibleErrorCode());
unsigned ItemSize = getDwarfStringOffsetsByteSize();
uint64_t Offset = getStringOffsetsBase() + Index * ItemSize;
if (StringOffsetSection.Data.size() < Offset + ItemSize)
- return None;
+ return make_error<StringError>("DW_FORM_strx uses index " + Twine(Index) +
+ ", which is too large",
+ inconvertibleErrorCode());
DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
isLittleEndian, 0);
return DA.getRelocatedValue(ItemSize, &Offset);
@@ -603,7 +607,7 @@ bool DWARFUnit::parseDWO() {
DWO->setAddrOffsetSection(AddrOffsetSection, *AddrOffsetSectionBase);
if (getVersion() == 4) {
auto DWORangesBase = UnitDie.getRangesBaseAttribute();
- DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0);
+ DWO->setRangesSection(RangeSection, DWORangesBase.getValueOr(0));
}
return true;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 7673a721c4ea..6424c2f59844 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -390,6 +390,9 @@ bool DWARFVerifier::handleDebugInfo() {
OS << "Verifying non-dwo Units...\n";
NumErrors += verifyUnits(DCtx.getNormalUnitsVector());
+
+ OS << "Verifying dwo Units...\n";
+ NumErrors += verifyUnits(DCtx.getDWOUnitsVector());
return NumErrors == 0;
}
@@ -400,10 +403,13 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
if (!Die.isValid())
return NumErrors;
+ DWARFUnit *Unit = Die.getDwarfUnit();
+
auto RangesOrError = Die.getAddressRanges();
if (!RangesOrError) {
// FIXME: Report the error.
- ++NumErrors;
+ if (!Unit->isDWOUnit())
+ ++NumErrors;
llvm::consumeError(RangesOrError.takeError());
return NumErrors;
}
@@ -496,15 +502,18 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
};
const DWARFObject &DObj = DCtx.getDWARFObj();
+ DWARFUnit *U = Die.getDwarfUnit();
const auto Attr = AttrValue.Attr;
switch (Attr) {
case DW_AT_ranges:
// Make sure the offset in the DW_AT_ranges attribute is valid.
if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
- unsigned DwarfVersion = Die.getDwarfUnit()->getVersion();
+ unsigned DwarfVersion = U->getVersion();
const DWARFSection &RangeSection = DwarfVersion < 5
? DObj.getRangesSection()
: DObj.getRnglistsSection();
+ if (U->isDWOUnit() && RangeSection.Data.empty())
+ break;
if (*SectionOffset >= RangeSection.Data.size())
ReportError(
"DW_AT_ranges offset is beyond " +
@@ -517,7 +526,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
case DW_AT_stmt_list:
// Make sure the offset in the DW_AT_stmt_list attribute is valid.
if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
- if (*SectionOffset >= DObj.getLineSection().Data.size())
+ if (*SectionOffset >= U->getLineSection().Data.size())
ReportError("DW_AT_stmt_list offset is beyond .debug_line bounds: " +
llvm::formatv("{0:x8}", *SectionOffset));
break;
@@ -525,9 +534,18 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
ReportError("DIE has invalid DW_AT_stmt_list encoding:");
break;
case DW_AT_location: {
+ // FIXME: It might be nice if there's a way to walk location expressions
+ // without trying to resolve the address ranges - it'd be a more efficient
+ // API (since the API is currently unnecessarily resolving addresses for
+ // this use case which only wants to validate the expressions themselves) &
+ // then the expressions could be validated even if the addresses can't be
+ // resolved.
+ // That sort of API would probably look like a callback "for each
+ // expression" with some way to lazily resolve the address ranges when
+ // needed (& then the existing API used here could be built on top of that -
+ // using the callback API to build the data structure and return it).
if (Expected<std::vector<DWARFLocationExpression>> Loc =
Die.getLocations(DW_AT_location)) {
- DWARFUnit *U = Die.getDwarfUnit();
for (const auto &Entry : *Loc) {
DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(), 0);
DWARFExpression Expression(Data, U->getAddressByteSize(),
@@ -539,8 +557,12 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
if (Error || !Expression.verify(U))
ReportError("DIE contains invalid DWARF expression:");
}
- } else
- ReportError(toString(Loc.takeError()));
+ } else if (Error Err = handleErrors(
+ Loc.takeError(), [&](std::unique_ptr<ResolverError> E) {
+ return U->isDWOUnit() ? Error::success()
+ : Error(std::move(E));
+ }))
+ ReportError(toString(std::move(Err)));
break;
}
case DW_AT_specification:
@@ -576,7 +598,8 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
case DW_AT_call_file:
case DW_AT_decl_file: {
if (auto FileIdx = AttrValue.Value.getAsUnsignedConstant()) {
- DWARFUnit *U = Die.getDwarfUnit();
+ if (U->isDWOUnit() && !U->isTypeUnit())
+ break;
const auto *LT = U->getContext().getLineTableForUnit(U);
if (LT) {
if (!LT->hasFileAtIndex(*FileIdx)) {
@@ -616,7 +639,6 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
DWARFAttribute &AttrValue,
ReferenceMap &LocalReferences,
ReferenceMap &CrossUnitReferences) {
- const DWARFObject &DObj = DCtx.getDWARFObj();
auto DieCU = Die.getDwarfUnit();
unsigned NumErrors = 0;
const auto Form = AttrValue.Value.getForm();
@@ -667,51 +689,15 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
}
break;
}
- case DW_FORM_strp: {
- auto SecOffset = AttrValue.Value.getAsSectionOffset();
- assert(SecOffset); // DW_FORM_strp is a section offset.
- if (SecOffset && *SecOffset >= DObj.getStrSection().size()) {
- ++NumErrors;
- error() << "DW_FORM_strp offset beyond .debug_str bounds:\n";
- dump(Die) << '\n';
- }
- break;
- }
+ case DW_FORM_strp:
case DW_FORM_strx:
case DW_FORM_strx1:
case DW_FORM_strx2:
case DW_FORM_strx3:
case DW_FORM_strx4: {
- auto Index = AttrValue.Value.getRawUValue();
- auto DieCU = Die.getDwarfUnit();
- // Check that we have a valid DWARF v5 string offsets table.
- if (!DieCU->getStringOffsetsTableContribution()) {
- ++NumErrors;
- error() << FormEncodingString(Form)
- << " used without a valid string offsets table:\n";
- dump(Die) << '\n';
- break;
- }
- // Check that the index is within the bounds of the section.
- unsigned ItemSize = DieCU->getDwarfStringOffsetsByteSize();
- // Use a 64-bit type to calculate the offset to guard against overflow.
- uint64_t Offset =
- (uint64_t)DieCU->getStringOffsetsBase() + Index * ItemSize;
- if (DObj.getStrOffsetsSection().Data.size() < Offset + ItemSize) {
- ++NumErrors;
- error() << FormEncodingString(Form) << " uses index "
- << format("%" PRIu64, Index) << ", which is too large:\n";
- dump(Die) << '\n';
- break;
- }
- // Check that the string offset is valid.
- uint64_t StringOffset = *DieCU->getStringOffsetSectionItem(Index);
- if (StringOffset >= DObj.getStrSection().size()) {
+ if (Error E = AttrValue.Value.getAsCString().takeError()) {
++NumErrors;
- error() << FormEncodingString(Form) << " uses index "
- << format("%" PRIu64, Index)
- << ", but the referenced string"
- " offset is beyond .debug_str bounds:\n";
+ error() << toString(std::move(E)) << ":\n";
dump(Die) << '\n';
}
break;
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index b2c43b893cd3..6eef6f84ab40 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -531,7 +531,7 @@ llvm::Error DwarfTransformer::verify(StringRef GsymPath) {
<< LR->Locations.size() << "\n";
Log << " " << NumDwarfInlineInfos << " DWARF frames:\n";
for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
- const auto dii = DwarfInlineInfos.getFrame(Idx);
+ const auto &dii = DwarfInlineInfos.getFrame(Idx);
Log << " [" << Idx << "]: " << dii.FunctionName << " @ "
<< dii.FileName << ':' << dii.Line << '\n';
}
@@ -551,7 +551,7 @@ llvm::Error DwarfTransformer::verify(StringRef GsymPath) {
++Idx) {
const auto &gii = LR->Locations[Idx];
if (Idx < NumDwarfInlineInfos) {
- const auto dii = DwarfInlineInfos.getFrame(Idx);
+ const auto &dii = DwarfInlineInfos.getFrame(Idx);
gsymFilename = LR->getSourceFile(Idx);
// Verify function name
if (dii.FunctionName.find(gii.Name.str()) != 0)
diff --git a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
index 1a92e2cb7754..f9a763d724a8 100644
--- a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
+++ b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
@@ -343,15 +343,25 @@ Expected<FileBufferByteStream> MSFBuilder::commit(StringRef Path,
Layout = std::move(*L);
uint64_t FileSize = uint64_t(Layout.SB->BlockSize) * Layout.SB->NumBlocks;
- if (FileSize > UINT32_MAX) {
- // FIXME: Changing the BinaryStream classes to use 64-bit numbers lets
- // us create PDBs larger than 4 GiB successfully. The file format is
- // block-based and as long as each stream is small enough, PDBs larger than
- // 4 GiB might work. Check if tools can handle these large PDBs, and if so
- // add support for writing them.
+ // Ensure that the file size is under the limit for the specified block size.
+ if (FileSize > getMaxFileSizeFromBlockSize(Layout.SB->BlockSize)) {
+ msf_error_code error_code = [](uint32_t BlockSize) {
+ switch (BlockSize) {
+ case 8192:
+ return msf_error_code::size_overflow_8192;
+ case 16384:
+ return msf_error_code::size_overflow_16384;
+ case 32768:
+ return msf_error_code::size_overflow_32768;
+ default:
+ return msf_error_code::size_overflow_4096;
+ }
+ }(Layout.SB->BlockSize);
+
return make_error<MSFError>(
- msf_error_code::size_overflow,
- formatv("File size would have been {0,1:N}", FileSize));
+ error_code,
+ formatv("File size {0,1:N} too large for current PDB page size {1}",
+ FileSize, Layout.SB->BlockSize));
}
auto OutFileOrError = FileOutputBuffer::create(Path, FileSize);
diff --git a/llvm/lib/DebugInfo/MSF/MSFError.cpp b/llvm/lib/DebugInfo/MSF/MSFError.cpp
index e42157e9d48e..9df2158423a4 100644
--- a/llvm/lib/DebugInfo/MSF/MSFError.cpp
+++ b/llvm/lib/DebugInfo/MSF/MSFError.cpp
@@ -28,8 +28,14 @@ public:
case msf_error_code::insufficient_buffer:
return "The buffer is not large enough to read the requested number of "
"bytes.";
- case msf_error_code::size_overflow:
+ case msf_error_code::size_overflow_4096:
return "Output data is larger than 4 GiB.";
+ case msf_error_code::size_overflow_8192:
+ return "Output data is larger than 8 GiB.";
+ case msf_error_code::size_overflow_16384:
+ return "Output data is larger than 16 GiB.";
+ case msf_error_code::size_overflow_32768:
+ return "Output data is larger than 32 GiB.";
case msf_error_code::not_writable:
return "The specified stream is not writable.";
case msf_error_code::no_stream:
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
index cde645236851..5c61530c470d 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -100,7 +100,7 @@ PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
return ContainerLayout.StreamMap[StreamIndex];
}
-uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
+uint64_t PDBFile::getFileSize() const { return Buffer->getLength(); }
Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
uint32_t NumBytes) const {
diff --git a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
index fd9a0deb54d6..f9e67014477e 100644
--- a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
@@ -518,8 +518,8 @@ SymbolCache::findLineTable(uint16_t Modi) const {
const std::vector<LineTableEntry> &RHS) {
return LHS[0].Addr < RHS[0].Addr;
});
- for (size_t I = 0; I < EntryList.size(); ++I)
- llvm::append_range(ModuleLineTable, EntryList[I]);
+ for (std::vector<LineTableEntry> &I : EntryList)
+ llvm::append_range(ModuleLineTable, I);
return ModuleLineTable;
}
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index f3f09584fdc9..5ec79df17fed 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -20,6 +20,7 @@
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/PDB/PDB.h"
#include "llvm/DebugInfo/PDB/PDBContext.h"
+#include "llvm/Debuginfod/Debuginfod.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/MachO.h"
@@ -384,7 +385,14 @@ bool findDebugBinary(const std::vector<std::string> &DebugFileDirectory,
}
}
}
- return false;
+ // Try debuginfod client cache and known servers.
+ Expected<std::string> PathOrErr = getCachedOrDownloadDebuginfo(BuildID);
+ if (!PathOrErr) {
+ consumeError(PathOrErr.takeError());
+ return false;
+ }
+ Result = *PathOrErr;
+ return true;
}
} // end anonymous namespace
diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp
new file mode 100644
index 000000000000..389b18fd62ac
--- /dev/null
+++ b/llvm/lib/Debuginfod/Debuginfod.cpp
@@ -0,0 +1,183 @@
+//===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///
+/// This file defines the fetchInfo function, which retrieves
+/// any of the three supported artifact types: (executable, debuginfo, source
+/// file) associated with a build-id from debuginfod servers. If a source file
+/// is to be fetched, its absolute path must be specified in the Description
+/// argument to fetchInfo.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debuginfod/Debuginfod.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Debuginfod/HTTPClient.h"
+#include "llvm/Support/CachePruning.h"
+#include "llvm/Support/Caching.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/xxhash.h"
+
+namespace llvm {
+static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); }
+
+// Returns a binary BuildID as a normalized hex string.
+// Uses lowercase for compatibility with common debuginfod servers.
+static std::string buildIDToString(BuildIDRef ID) {
+ return llvm::toHex(ID, /*LowerCase=*/true);
+}
+
+Expected<SmallVector<StringRef>> getDefaultDebuginfodUrls() {
+ const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS");
+ if (DebuginfodUrlsEnv == NULL)
+ return SmallVector<StringRef>();
+
+ SmallVector<StringRef> DebuginfodUrls;
+ StringRef(DebuginfodUrlsEnv).split(DebuginfodUrls, " ");
+ return DebuginfodUrls;
+}
+
+Expected<std::string> getDefaultDebuginfodCacheDirectory() {
+ if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
+ return CacheDirectoryEnv;
+
+ SmallString<64> CacheDirectory;
+ if (!sys::path::cache_directory(CacheDirectory))
+ return createStringError(
+ errc::io_error, "Unable to determine appropriate cache directory.");
+ return std::string(CacheDirectory);
+}
+
+std::chrono::milliseconds getDefaultDebuginfodTimeout() {
+ long Timeout;
+ const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");
+ if (DebuginfodTimeoutEnv &&
+ to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))
+ return std::chrono::milliseconds(Timeout * 1000);
+
+ return std::chrono::milliseconds(90 * 1000);
+}
+
+/// The following functions fetch a debuginfod artifact to a file in a local
+/// cache and return the cached file path. They first search the local cache,
+/// followed by the debuginfod servers.
+
+Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
+ StringRef SourceFilePath) {
+ SmallString<64> UrlPath;
+ sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
+ buildIDToString(ID), "source",
+ sys::path::convert_to_slash(SourceFilePath));
+ return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
+}
+
+Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
+ SmallString<64> UrlPath;
+ sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
+ buildIDToString(ID), "executable");
+ return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
+}
+
+Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
+ SmallString<64> UrlPath;
+ sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
+ buildIDToString(ID), "debuginfo");
+ return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
+}
+
+// General fetching function.
+Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
+ StringRef UrlPath) {
+ SmallString<10> CacheDir;
+
+ Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
+ if (!CacheDirOrErr)
+ return CacheDirOrErr.takeError();
+ CacheDir = *CacheDirOrErr;
+
+ Expected<SmallVector<StringRef>> DebuginfodUrlsOrErr =
+ getDefaultDebuginfodUrls();
+ if (!DebuginfodUrlsOrErr)
+ return DebuginfodUrlsOrErr.takeError();
+ SmallVector<StringRef> &DebuginfodUrls = *DebuginfodUrlsOrErr;
+ return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
+ DebuginfodUrls,
+ getDefaultDebuginfodTimeout());
+}
+
+Expected<std::string> getCachedOrDownloadArtifact(
+ StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
+ ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
+ SmallString<64> AbsCachedArtifactPath;
+ sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,
+ "llvmcache-" + UniqueKey);
+
+ Expected<FileCache> CacheOrErr =
+ localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);
+ if (!CacheOrErr)
+ return CacheOrErr.takeError();
+
+ FileCache Cache = *CacheOrErr;
+ // We choose an arbitrary Task parameter as we do not make use of it.
+ unsigned Task = 0;
+ Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey);
+ if (!CacheAddStreamOrErr)
+ return CacheAddStreamOrErr.takeError();
+ AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
+ if (!CacheAddStream)
+ return std::string(AbsCachedArtifactPath);
+ // The artifact was not found in the local cache, query the debuginfod
+ // servers.
+ if (!HTTPClient::isAvailable())
+ return createStringError(errc::io_error,
+ "No working HTTP client is available.");
+
+ if (!HTTPClient::IsInitialized)
+ return createStringError(
+ errc::io_error,
+ "A working HTTP client is available, but it is not initialized. To "
+ "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
+ "at the beginning of main.");
+
+ HTTPClient Client;
+ Client.setTimeout(Timeout);
+ for (StringRef ServerUrl : DebuginfodUrls) {
+ SmallString<64> ArtifactUrl;
+ sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
+
+ Expected<HTTPResponseBuffer> ResponseOrErr = Client.get(ArtifactUrl);
+ if (!ResponseOrErr)
+ return ResponseOrErr.takeError();
+
+ HTTPResponseBuffer &Response = *ResponseOrErr;
+ if (Response.Code != 200)
+ continue;
+
+ // We have retrieved the artifact from this server, and now add it to the
+ // file cache.
+ Expected<std::unique_ptr<CachedFileStream>> FileStreamOrErr =
+ CacheAddStream(Task);
+ if (!FileStreamOrErr)
+ return FileStreamOrErr.takeError();
+ std::unique_ptr<CachedFileStream> &FileStream = *FileStreamOrErr;
+ if (!Response.Body)
+ return createStringError(
+ errc::io_error, "Unallocated MemoryBuffer in HTTPResponseBuffer.");
+
+ *FileStream->OS << StringRef(Response.Body->getBufferStart(),
+ Response.Body->getBufferSize());
+
+ // Return the path to the artifact on disk.
+ return std::string(AbsCachedArtifactPath);
+ }
+
+ return createStringError(errc::argument_out_of_domain, "build id not found");
+}
+} // namespace llvm
diff --git a/llvm/lib/Debuginfod/HTTPClient.cpp b/llvm/lib/Debuginfod/HTTPClient.cpp
new file mode 100644
index 000000000000..65f457933b92
--- /dev/null
+++ b/llvm/lib/Debuginfod/HTTPClient.cpp
@@ -0,0 +1,216 @@
+//===-- llvm/Debuginfod/HTTPClient.cpp - HTTP client library ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///
+/// This file defines the methods of the HTTPRequest, HTTPClient, and
+/// BufferedHTTPResponseHandler classes.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debuginfod/HTTPClient.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+#ifdef LLVM_ENABLE_CURL
+#include <curl/curl.h>
+#endif
+
+using namespace llvm;
+
+HTTPRequest::HTTPRequest(StringRef Url) { this->Url = Url.str(); }
+
+bool operator==(const HTTPRequest &A, const HTTPRequest &B) {
+ return A.Url == B.Url && A.Method == B.Method &&
+ A.FollowRedirects == B.FollowRedirects;
+}
+
+HTTPResponseHandler::~HTTPResponseHandler() = default;
+
+static inline bool parseContentLengthHeader(StringRef LineRef,
+ size_t &ContentLength) {
+ // Content-Length is a mandatory header, and the only one we handle.
+ return LineRef.consume_front("Content-Length: ") &&
+ to_integer(LineRef.trim(), ContentLength, 10);
+}
+
+Error BufferedHTTPResponseHandler::handleHeaderLine(StringRef HeaderLine) {
+ if (ResponseBuffer.Body)
+ return Error::success();
+
+ size_t ContentLength;
+ if (parseContentLengthHeader(HeaderLine, ContentLength))
+ ResponseBuffer.Body =
+ WritableMemoryBuffer::getNewUninitMemBuffer(ContentLength);
+
+ return Error::success();
+}
+
+Error BufferedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
+ if (!ResponseBuffer.Body)
+ return createStringError(errc::io_error,
+ "Unallocated response buffer. HTTP Body data "
+ "received before Content-Length header.");
+ if (Offset + BodyChunk.size() > ResponseBuffer.Body->getBufferSize())
+ return createStringError(errc::io_error,
+ "Content size exceeds buffer size.");
+ memcpy(ResponseBuffer.Body->getBufferStart() + Offset, BodyChunk.data(),
+ BodyChunk.size());
+ Offset += BodyChunk.size();
+ return Error::success();
+}
+
+Error BufferedHTTPResponseHandler::handleStatusCode(unsigned Code) {
+ ResponseBuffer.Code = Code;
+ return Error::success();
+}
+
+bool HTTPClient::IsInitialized = false;
+
+class HTTPClientCleanup {
+public:
+ ~HTTPClientCleanup() { HTTPClient::cleanup(); }
+};
+static const HTTPClientCleanup Cleanup;
+
+Expected<HTTPResponseBuffer> HTTPClient::perform(const HTTPRequest &Request) {
+ BufferedHTTPResponseHandler Handler;
+ if (Error Err = perform(Request, Handler))
+ return std::move(Err);
+ return std::move(Handler.ResponseBuffer);
+}
+
+Expected<HTTPResponseBuffer> HTTPClient::get(StringRef Url) {
+ HTTPRequest Request(Url);
+ return perform(Request);
+}
+
+#ifdef LLVM_ENABLE_CURL
+
+bool HTTPClient::isAvailable() { return true; }
+
+void HTTPClient::initialize() {
+ if (!IsInitialized) {
+ curl_global_init(CURL_GLOBAL_ALL);
+ IsInitialized = true;
+ }
+}
+
+void HTTPClient::cleanup() {
+ if (IsInitialized) {
+ curl_global_cleanup();
+ IsInitialized = false;
+ }
+}
+
+void HTTPClient::setTimeout(std::chrono::milliseconds Timeout) {
+ if (Timeout < std::chrono::milliseconds(0))
+ Timeout = std::chrono::milliseconds(0);
+ curl_easy_setopt(Curl, CURLOPT_TIMEOUT_MS, Timeout.count());
+}
+
+/// CurlHTTPRequest and the curl{Header,Write}Function are implementation
+/// details used to work with Curl. Curl makes callbacks with a single
+/// customizable pointer parameter.
+struct CurlHTTPRequest {
+ CurlHTTPRequest(HTTPResponseHandler &Handler) : Handler(Handler) {}
+ void storeError(Error Err) {
+ ErrorState = joinErrors(std::move(Err), std::move(ErrorState));
+ }
+ HTTPResponseHandler &Handler;
+ llvm::Error ErrorState = Error::success();
+};
+
+static size_t curlHeaderFunction(char *Contents, size_t Size, size_t NMemb,
+ CurlHTTPRequest *CurlRequest) {
+ assert(Size == 1 && "The Size passed by libCURL to CURLOPT_HEADERFUNCTION "
+ "should always be 1.");
+ if (Error Err =
+ CurlRequest->Handler.handleHeaderLine(StringRef(Contents, NMemb))) {
+ CurlRequest->storeError(std::move(Err));
+ return 0;
+ }
+ return NMemb;
+}
+
+static size_t curlWriteFunction(char *Contents, size_t Size, size_t NMemb,
+ CurlHTTPRequest *CurlRequest) {
+ Size *= NMemb;
+ if (Error Err =
+ CurlRequest->Handler.handleBodyChunk(StringRef(Contents, Size))) {
+ CurlRequest->storeError(std::move(Err));
+ return 0;
+ }
+ return Size;
+}
+
+HTTPClient::HTTPClient() {
+ assert(IsInitialized &&
+ "Must call HTTPClient::initialize() at the beginning of main().");
+ if (Curl)
+ return;
+ assert((Curl = curl_easy_init()) && "Curl could not be initialized.");
+ // Set the callback hooks.
+ curl_easy_setopt(Curl, CURLOPT_WRITEFUNCTION, curlWriteFunction);
+ curl_easy_setopt(Curl, CURLOPT_HEADERFUNCTION, curlHeaderFunction);
+}
+
+HTTPClient::~HTTPClient() { curl_easy_cleanup(Curl); }
+
+Error HTTPClient::perform(const HTTPRequest &Request,
+ HTTPResponseHandler &Handler) {
+ if (Request.Method != HTTPMethod::GET)
+ return createStringError(errc::invalid_argument,
+ "Unsupported CURL request method.");
+
+ SmallString<128> Url = Request.Url;
+ curl_easy_setopt(Curl, CURLOPT_URL, Url.c_str());
+ curl_easy_setopt(Curl, CURLOPT_FOLLOWLOCATION, Request.FollowRedirects);
+
+ CurlHTTPRequest CurlRequest(Handler);
+ curl_easy_setopt(Curl, CURLOPT_WRITEDATA, &CurlRequest);
+ curl_easy_setopt(Curl, CURLOPT_HEADERDATA, &CurlRequest);
+ CURLcode CurlRes = curl_easy_perform(Curl);
+ if (CurlRes != CURLE_OK)
+ return joinErrors(std::move(CurlRequest.ErrorState),
+ createStringError(errc::io_error,
+ "curl_easy_perform() failed: %s\n",
+ curl_easy_strerror(CurlRes)));
+ if (CurlRequest.ErrorState)
+ return std::move(CurlRequest.ErrorState);
+
+ unsigned Code;
+ curl_easy_getinfo(Curl, CURLINFO_RESPONSE_CODE, &Code);
+ if (Error Err = Handler.handleStatusCode(Code))
+ return joinErrors(std::move(CurlRequest.ErrorState), std::move(Err));
+
+ return std::move(CurlRequest.ErrorState);
+}
+
+#else
+
+HTTPClient::HTTPClient() = default;
+
+HTTPClient::~HTTPClient() = default;
+
+bool HTTPClient::isAvailable() { return false; }
+
+void HTTPClient::initialize() {}
+
+void HTTPClient::cleanup() {}
+
+void HTTPClient::setTimeout(std::chrono::milliseconds Timeout) {}
+
+Error HTTPClient::perform(const HTTPRequest &Request,
+ HTTPResponseHandler &Handler) {
+ llvm_unreachable("No HTTP Client implementation available.");
+}
+
+#endif
diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp
index f380aa90035e..0cefbd63a7ae 100644
--- a/llvm/lib/Demangle/DLangDemangle.cpp
+++ b/llvm/lib/Demangle/DLangDemangle.cpp
@@ -242,11 +242,77 @@ const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
// TODO: Parse template instances with a length prefix.
+ // There can be multiple different declarations in the same function that
+ // have the same mangled name. To make the mangled names unique, a fake
+ // parent in the form `__Sddd' is added to the symbol.
+ if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') {
+ const char *NumPtr = Mangled + 3;
+ while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr))
+ ++NumPtr;
+
+ if (Mangled + Len == NumPtr) {
+ // Skip over the fake parent.
+ Mangled += Len;
+ return parseIdentifier(Demangled, Mangled);
+ }
+
+ // Else demangle it as a plain identifier.
+ }
+
return parseLName(Demangled, Mangled, Len);
}
const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
unsigned long Len) {
+ switch (Len) {
+ case 6:
+ if (strncmp(Mangled, "__initZ", Len + 1) == 0) {
+ // The static initializer for a given symbol.
+ Demangled->prepend("initializer for ");
+ Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
+ Mangled += Len;
+ return Mangled;
+ }
+ if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) {
+ // The vtable symbol for a given class.
+ Demangled->prepend("vtable for ");
+ Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
+ Mangled += Len;
+ return Mangled;
+ }
+ break;
+
+ case 7:
+ if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) {
+ // The classinfo symbol for a given class.
+ Demangled->prepend("ClassInfo for ");
+ Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
+ Mangled += Len;
+ return Mangled;
+ }
+ break;
+
+ case 11:
+ if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) {
+ // The interface symbol for a given class.
+ Demangled->prepend("Interface for ");
+ Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
+ Mangled += Len;
+ return Mangled;
+ }
+ break;
+
+ case 12:
+ if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) {
+ // The ModuleInfo symbol for a given module.
+ Demangled->prepend("ModuleInfo for ");
+ Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
+ Mangled += Len;
+ return Mangled;
+ }
+ break;
+ }
+
*Demangled << StringView(Mangled, Len);
Mangled += Len;
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
index 3ea9ffee6554..27d8833ae19e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
@@ -241,7 +241,9 @@ private:
}
case Branch32: {
Kind = x86_64::BranchPCRel32;
- Addend = 0;
+ // BranchPCRel32 implicitly handles the '-4' PC adjustment, so we have to
+ // adjust the addend by '+4' to compensate.
+ Addend += 4;
break;
}
}
@@ -252,7 +254,7 @@ private:
Edge GE(Kind, Offset, *GraphSymbol, Addend);
LLVM_DEBUG({
dbgs() << " ";
- printEdge(dbgs(), *BlockToFix, GE, getELFX86RelocationKindName(Kind));
+ printEdge(dbgs(), *BlockToFix, GE, x86_64::getEdgeKindName(Kind));
dbgs() << "\n";
});
diff --git a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 200f42aec067..ed912280ac82 100644
--- a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -170,8 +170,8 @@ std::unique_ptr<MemoryBuffer> MCJIT::emitObject(Module *M) {
PM.run(*M);
// Flush the output buffer to get the generated code into memory
- std::unique_ptr<MemoryBuffer> CompiledObjBuffer(
- new SmallVectorMemoryBuffer(std::move(ObjBufferSV)));
+ auto CompiledObjBuffer = std::make_unique<SmallVectorMemoryBuffer>(
+ std::move(ObjBufferSV), /*RequiresNullTerminator=*/false);
// If we have an object cache, tell it about the new object.
// Note that we're using the compiled image, not the loaded image (as below).
diff --git a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index 52e7eda90310..a5dd420c9132 100644
--- a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -151,12 +151,8 @@ class MCJIT : public ExecutionEngine {
}
void markAllLoadedModulesAsFinalized() {
- for (ModulePtrSet::iterator I = LoadedModules.begin(),
- E = LoadedModules.end();
- I != E; ++I) {
- Module *M = *I;
+ for (Module *M : LoadedModules)
FinalizedModules.insert(M);
- }
LoadedModules.clear();
}
@@ -167,10 +163,8 @@ class MCJIT : public ExecutionEngine {
void freeModulePtrSet(ModulePtrSet& MPS) {
// Go through the module set and delete everything.
- for (ModulePtrSet::iterator I = MPS.begin(), E = MPS.end(); I != E; ++I) {
- Module *M = *I;
+ for (Module *M : MPS)
delete M;
- }
MPS.clear();
}
};
diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index 9ff6cec8c6c5..e2a0cadb6348 100644
--- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -78,11 +78,10 @@ public:
: IRMaterializationUnit(ES, MO, std::move(TSM)), Parent(Parent) {}
PartitioningIRMaterializationUnit(
- ThreadSafeModule TSM, SymbolFlagsMap SymbolFlags,
- SymbolStringPtr InitSymbol, SymbolNameToDefinitionMap SymbolToDefinition,
+ ThreadSafeModule TSM, Interface I,
+ SymbolNameToDefinitionMap SymbolToDefinition,
CompileOnDemandLayer &Parent)
- : IRMaterializationUnit(std::move(TSM), std::move(SymbolFlags),
- std::move(InitSymbol),
+ : IRMaterializationUnit(std::move(TSM), std::move(I),
std::move(SymbolToDefinition)),
Parent(Parent) {}
@@ -298,7 +297,9 @@ void CompileOnDemandLayer::emitPartition(
if (GVsToExtract->empty()) {
if (auto Err =
R->replace(std::make_unique<PartitioningIRMaterializationUnit>(
- std::move(TSM), R->getSymbols(), R->getInitializerSymbol(),
+ std::move(TSM),
+ MaterializationUnit::Interface(R->getSymbols(),
+ R->getInitializerSymbol()),
std::move(Defs), *this))) {
getExecutionSession().reportError(std::move(Err));
R->failMaterialization();
diff --git a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
index f8efed15edea..f34247005258 100644
--- a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
@@ -53,7 +53,8 @@ Expected<SimpleCompiler::CompileResult> SimpleCompiler::operator()(Module &M) {
}
auto ObjBuffer = std::make_unique<SmallVectorMemoryBuffer>(
- std::move(ObjBufferSV), M.getModuleIdentifier() + "-jitted-objectbuffer");
+ std::move(ObjBufferSV), M.getModuleIdentifier() + "-jitted-objectbuffer",
+ /*RequiresNullTerminator=*/false);
auto Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 56a97f83d915..aa82cf38c45d 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -243,8 +243,7 @@ void AsynchronousSymbolQuery::detach() {
AbsoluteSymbolsMaterializationUnit::AbsoluteSymbolsMaterializationUnit(
SymbolMap Symbols)
- : MaterializationUnit(extractFlags(Symbols), nullptr),
- Symbols(std::move(Symbols)) {}
+ : MaterializationUnit(extractFlags(Symbols)), Symbols(std::move(Symbols)) {}
StringRef AbsoluteSymbolsMaterializationUnit::getName() const {
return "<Absolute Symbols>";
@@ -263,18 +262,18 @@ void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD,
Symbols.erase(Name);
}
-SymbolFlagsMap
+MaterializationUnit::Interface
AbsoluteSymbolsMaterializationUnit::extractFlags(const SymbolMap &Symbols) {
SymbolFlagsMap Flags;
for (const auto &KV : Symbols)
Flags[KV.first] = KV.second.getFlags();
- return Flags;
+ return MaterializationUnit::Interface(std::move(Flags), nullptr);
}
ReExportsMaterializationUnit::ReExportsMaterializationUnit(
JITDylib *SourceJD, JITDylibLookupFlags SourceJDLookupFlags,
SymbolAliasMap Aliases)
- : MaterializationUnit(extractFlags(Aliases), nullptr), SourceJD(SourceJD),
+ : MaterializationUnit(extractFlags(Aliases)), SourceJD(SourceJD),
SourceJDLookupFlags(SourceJDLookupFlags), Aliases(std::move(Aliases)) {}
StringRef ReExportsMaterializationUnit::getName() const {
@@ -456,13 +455,13 @@ void ReExportsMaterializationUnit::discard(const JITDylib &JD,
Aliases.erase(Name);
}
-SymbolFlagsMap
+MaterializationUnit::Interface
ReExportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) {
SymbolFlagsMap SymbolFlags;
for (auto &KV : Aliases)
SymbolFlags[KV.first] = KV.second.AliasFlags;
- return SymbolFlags;
+ return MaterializationUnit::Interface(std::move(SymbolFlags), nullptr);
}
Expected<SymbolAliasMap> buildSimpleReexportsAliasMap(JITDylib &SourceJD,
@@ -2492,10 +2491,19 @@ void ExecutionSession::OL_applyQueryPhase1(
}
}
- // If we get here then we've moved on to the next JITDylib.
- LLVM_DEBUG(dbgs() << "Phase 1 moving to next JITDylib.\n");
- ++IPLS->CurSearchOrderIndex;
- IPLS->NewJITDylib = true;
+ if (IPLS->DefGeneratorCandidates.empty() &&
+ IPLS->DefGeneratorNonCandidates.empty()) {
+ // Early out if there are no remaining symbols.
+ LLVM_DEBUG(dbgs() << "All symbols matched.\n");
+ IPLS->CurSearchOrderIndex = IPLS->SearchOrder.size();
+ break;
+ } else {
+ // If we get here then we've moved on to the next JITDylib with candidates
+ // remaining.
+ LLVM_DEBUG(dbgs() << "Phase 1 moving to next JITDylib.\n");
+ ++IPLS->CurSearchOrderIndex;
+ IPLS->NewJITDylib = true;
+ }
}
// Remove any weakly referenced candidates that could not be found/generated.
diff --git a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
index 8479495623b8..fe62138c790c 100644
--- a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
@@ -154,8 +154,24 @@ public:
}
DebugSecInfos.push_back({&Sec, Sec.getName().substr(0, SepPos),
Sec.getName().substr(SepPos + 1), 0, 0});
- } else
+ } else {
NonDebugSections.push_back(&Sec);
+
+ // If the first block in the section has a non-zero alignment offset
+ // then we need to add a padding block, since the section command in
+ // the header doesn't allow for aligment offsets.
+ SectionRange R(Sec);
+ if (!R.empty()) {
+ auto &FB = *R.getFirstBlock();
+ if (FB.getAlignmentOffset() != 0) {
+ auto Padding = G.allocateBuffer(FB.getAlignmentOffset());
+ memset(Padding.data(), 0, Padding.size());
+ G.createContentBlock(Sec, Padding,
+ FB.getAddress() - FB.getAlignmentOffset(),
+ FB.getAlignment(), 0);
+ }
+ }
+ }
}
// Create container block.
diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
index b17d196f01b6..eded54f4bfb3 100644
--- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
@@ -28,8 +28,8 @@ class DSOHandleMaterializationUnit : public MaterializationUnit {
public:
DSOHandleMaterializationUnit(ELFNixPlatform &ENP,
const SymbolStringPtr &DSOHandleSymbol)
- : MaterializationUnit(createDSOHandleSectionSymbols(ENP, DSOHandleSymbol),
- DSOHandleSymbol),
+ : MaterializationUnit(
+ createDSOHandleSectionInterface(ENP, DSOHandleSymbol)),
ENP(ENP) {}
StringRef getName() const override { return "DSOHandleMU"; }
@@ -70,12 +70,13 @@ public:
void discard(const JITDylib &JD, const SymbolStringPtr &Sym) override {}
private:
- static SymbolFlagsMap
- createDSOHandleSectionSymbols(ELFNixPlatform &ENP,
- const SymbolStringPtr &DSOHandleSymbol) {
+ static MaterializationUnit::Interface
+ createDSOHandleSectionInterface(ELFNixPlatform &ENP,
+ const SymbolStringPtr &DSOHandleSymbol) {
SymbolFlagsMap SymbolFlags;
SymbolFlags[DSOHandleSymbol] = JITSymbolFlags::Exported;
- return SymbolFlags;
+ return MaterializationUnit::Interface(std::move(SymbolFlags),
+ DSOHandleSymbol);
}
ArrayRef<char> getDSOHandleContent(size_t PointerSize) {
diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index 2ab9ed4f856b..ae2d47fb8c5e 100644
--- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -8,6 +8,7 @@
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/Layer.h"
+#include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -269,25 +270,30 @@ Error DynamicLibrarySearchGenerator::tryToGenerate(
}
Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
-StaticLibraryDefinitionGenerator::Load(ObjectLayer &L, const char *FileName) {
+StaticLibraryDefinitionGenerator::Load(
+ ObjectLayer &L, const char *FileName,
+ GetObjectFileInterface GetObjFileInterface) {
auto ArchiveBuffer = errorOrToExpected(MemoryBuffer::getFile(FileName));
if (!ArchiveBuffer)
return ArchiveBuffer.takeError();
- return Create(L, std::move(*ArchiveBuffer));
+ return Create(L, std::move(*ArchiveBuffer), std::move(GetObjFileInterface));
}
Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
-StaticLibraryDefinitionGenerator::Load(ObjectLayer &L, const char *FileName,
- const Triple &TT) {
+StaticLibraryDefinitionGenerator::Load(
+ ObjectLayer &L, const char *FileName, const Triple &TT,
+ GetObjectFileInterface GetObjFileInterface) {
+
auto B = object::createBinary(FileName);
if (!B)
return B.takeError();
// If this is a regular archive then create an instance from it.
if (isa<object::Archive>(B->getBinary()))
- return Create(L, std::move(B->takeBinary().second));
+ return Create(L, std::move(B->takeBinary().second),
+ std::move(GetObjFileInterface));
// If this is a universal binary then search for a slice matching the given
// Triple.
@@ -309,7 +315,8 @@ StaticLibraryDefinitionGenerator::Load(ObjectLayer &L, const char *FileName,
" .. " + formatv("{0:x}", Obj.getOffset() + Obj.getSize()) +
": " + SliceBuffer.getError().message(),
SliceBuffer.getError());
- return Create(L, std::move(*SliceBuffer));
+ return Create(L, std::move(*SliceBuffer),
+ std::move(GetObjFileInterface));
}
}
@@ -326,11 +333,13 @@ StaticLibraryDefinitionGenerator::Load(ObjectLayer &L, const char *FileName,
Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
StaticLibraryDefinitionGenerator::Create(
- ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer) {
+ ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer,
+ GetObjectFileInterface GetObjFileInterface) {
Error Err = Error::success();
std::unique_ptr<StaticLibraryDefinitionGenerator> ADG(
- new StaticLibraryDefinitionGenerator(L, std::move(ArchiveBuffer), Err));
+ new StaticLibraryDefinitionGenerator(
+ L, std::move(ArchiveBuffer), std::move(GetObjFileInterface), Err));
if (Err)
return std::move(Err);
@@ -371,7 +380,12 @@ Error StaticLibraryDefinitionGenerator::tryToGenerate(
MemoryBufferRef ChildBufferRef(ChildBufferInfo.first,
ChildBufferInfo.second);
- if (auto Err = L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef, false)))
+ auto I = GetObjFileInterface(L.getExecutionSession(), ChildBufferRef);
+ if (!I)
+ return I.takeError();
+
+ if (auto Err = L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef, false),
+ std::move(*I)))
return Err;
}
@@ -379,9 +393,15 @@ Error StaticLibraryDefinitionGenerator::tryToGenerate(
}
StaticLibraryDefinitionGenerator::StaticLibraryDefinitionGenerator(
- ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer, Error &Err)
- : L(L), ArchiveBuffer(std::move(ArchiveBuffer)),
- Archive(std::make_unique<object::Archive>(*this->ArchiveBuffer, Err)) {}
+ ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer,
+ GetObjectFileInterface GetObjFileInterface, Error &Err)
+ : L(L), GetObjFileInterface(std::move(GetObjFileInterface)),
+ ArchiveBuffer(std::move(ArchiveBuffer)),
+ Archive(std::make_unique<object::Archive>(*this->ArchiveBuffer, Err)) {
+
+ if (!this->GetObjFileInterface)
+ this->GetObjFileInterface = getObjectFileInterface;
+}
} // End namespace orc.
} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index ee1630a2ffa8..f427271bb45d 100644
--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -31,8 +31,8 @@ public:
CompileCallbackMaterializationUnit(SymbolStringPtr Name,
CompileFunction Compile)
- : MaterializationUnit(SymbolFlagsMap({{Name, JITSymbolFlags::Exported}}),
- nullptr),
+ : MaterializationUnit(Interface(
+ SymbolFlagsMap({{Name, JITSymbolFlags::Exported}}), nullptr)),
Name(std::move(Name)), Compile(std::move(Compile)) {}
StringRef getName() const override { return "<Compile Callbacks>"; }
diff --git a/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/llvm/lib/ExecutionEngine/Orc/Layer.cpp
index 20dfba23bf10..adb8861793b1 100644
--- a/llvm/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Layer.cpp
@@ -10,9 +10,8 @@
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h"
#include "llvm/IR/Constants.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "orc"
@@ -33,7 +32,7 @@ Error IRLayer::add(ResourceTrackerSP RT, ThreadSafeModule TSM) {
IRMaterializationUnit::IRMaterializationUnit(
ExecutionSession &ES, const IRSymbolMapper::ManglingOptions &MO,
ThreadSafeModule TSM)
- : MaterializationUnit(SymbolFlagsMap(), nullptr), TSM(std::move(TSM)) {
+ : MaterializationUnit(Interface()), TSM(std::move(TSM)) {
assert(this->TSM && "Module must not be null");
@@ -98,10 +97,10 @@ IRMaterializationUnit::IRMaterializationUnit(
}
IRMaterializationUnit::IRMaterializationUnit(
- ThreadSafeModule TSM, SymbolFlagsMap SymbolFlags,
- SymbolStringPtr InitSymbol, SymbolNameToDefinitionMap SymbolToDefinition)
- : MaterializationUnit(std::move(SymbolFlags), std::move(InitSymbol)),
- TSM(std::move(TSM)), SymbolToDefinition(std::move(SymbolToDefinition)) {}
+ ThreadSafeModule TSM, Interface I,
+ SymbolNameToDefinitionMap SymbolToDefinition)
+ : MaterializationUnit(std::move(I)), TSM(std::move(TSM)),
+ SymbolToDefinition(std::move(SymbolToDefinition)) {}
StringRef IRMaterializationUnit::getName() const {
if (TSM)
@@ -161,37 +160,47 @@ ObjectLayer::ObjectLayer(ExecutionSession &ES) : ES(ES) {}
ObjectLayer::~ObjectLayer() {}
-Error ObjectLayer::add(ResourceTrackerSP RT, std::unique_ptr<MemoryBuffer> O) {
+Error ObjectLayer::add(ResourceTrackerSP RT, std::unique_ptr<MemoryBuffer> O,
+ MaterializationUnit::Interface I) {
assert(RT && "RT can not be null");
- auto ObjMU = BasicObjectLayerMaterializationUnit::Create(*this, std::move(O));
- if (!ObjMU)
- return ObjMU.takeError();
auto &JD = RT->getJITDylib();
- return JD.define(std::move(*ObjMU), std::move(RT));
+ return JD.define(std::make_unique<BasicObjectLayerMaterializationUnit>(
+ *this, std::move(O), std::move(I)),
+ std::move(RT));
+}
+
+Error ObjectLayer::add(ResourceTrackerSP RT, std::unique_ptr<MemoryBuffer> O) {
+ auto I = getObjectFileInterface(getExecutionSession(), O->getMemBufferRef());
+ if (!I)
+ return I.takeError();
+ return add(std::move(RT), std::move(O), std::move(*I));
+}
+
+Error ObjectLayer::add(JITDylib &JD, std::unique_ptr<MemoryBuffer> O) {
+ auto I = getObjectFileInterface(getExecutionSession(), O->getMemBufferRef());
+ if (!I)
+ return I.takeError();
+ return add(JD, std::move(O), std::move(*I));
}
Expected<std::unique_ptr<BasicObjectLayerMaterializationUnit>>
BasicObjectLayerMaterializationUnit::Create(ObjectLayer &L,
std::unique_ptr<MemoryBuffer> O) {
- auto ObjSymInfo =
- getObjectSymbolInfo(L.getExecutionSession(), O->getMemBufferRef());
- if (!ObjSymInfo)
- return ObjSymInfo.takeError();
+ auto ObjInterface =
+ getObjectFileInterface(L.getExecutionSession(), O->getMemBufferRef());
- auto &SymbolFlags = ObjSymInfo->first;
- auto &InitSymbol = ObjSymInfo->second;
+ if (!ObjInterface)
+ return ObjInterface.takeError();
return std::unique_ptr<BasicObjectLayerMaterializationUnit>(
- new BasicObjectLayerMaterializationUnit(
- L, std::move(O), std::move(SymbolFlags), std::move(InitSymbol)));
+ new BasicObjectLayerMaterializationUnit(L, std::move(O),
+ std::move(*ObjInterface)));
}
BasicObjectLayerMaterializationUnit::BasicObjectLayerMaterializationUnit(
- ObjectLayer &L, std::unique_ptr<MemoryBuffer> O, SymbolFlagsMap SymbolFlags,
- SymbolStringPtr InitSymbol)
- : MaterializationUnit(std::move(SymbolFlags), std::move(InitSymbol)), L(L),
- O(std::move(O)) {}
+ ObjectLayer &L, std::unique_ptr<MemoryBuffer> O, Interface I)
+ : MaterializationUnit(std::move(I)), L(L), O(std::move(O)) {}
StringRef BasicObjectLayerMaterializationUnit::getName() const {
if (O)
diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index e1f494415e86..66453e6a632f 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -144,7 +144,7 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
LazyReexportsMaterializationUnit::LazyReexportsMaterializationUnit(
LazyCallThroughManager &LCTManager, IndirectStubsManager &ISManager,
JITDylib &SourceJD, SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc)
- : MaterializationUnit(extractFlags(CallableAliases), nullptr),
+ : MaterializationUnit(extractFlags(CallableAliases)),
LCTManager(LCTManager), ISManager(ISManager), SourceJD(SourceJD),
CallableAliases(std::move(CallableAliases)), AliaseeTable(SrcJDLoc) {}
@@ -219,7 +219,7 @@ void LazyReexportsMaterializationUnit::discard(const JITDylib &JD,
CallableAliases.erase(Name);
}
-SymbolFlagsMap
+MaterializationUnit::Interface
LazyReexportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) {
SymbolFlagsMap SymbolFlags;
for (auto &KV : Aliases) {
@@ -227,7 +227,7 @@ LazyReexportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) {
"Lazy re-exports must be callable symbols");
SymbolFlags[KV.first] = KV.second.AliasFlags;
}
- return SymbolFlags;
+ return MaterializationUnit::Interface(std::move(SymbolFlags), nullptr);
}
} // End namespace orc.
diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 46c915dfea9e..fb2e90e1c9c5 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -28,8 +28,7 @@ class MachOHeaderMaterializationUnit : public MaterializationUnit {
public:
MachOHeaderMaterializationUnit(MachOPlatform &MOP,
const SymbolStringPtr &HeaderStartSymbol)
- : MaterializationUnit(createHeaderSymbols(MOP, HeaderStartSymbol),
- HeaderStartSymbol),
+ : MaterializationUnit(createHeaderInterface(MOP, HeaderStartSymbol)),
MOP(MOP) {}
StringRef getName() const override { return "MachOHeaderMU"; }
@@ -110,9 +109,9 @@ private:
return G.createContentBlock(HeaderSection, HeaderContent, 0, 8, 0);
}
- static SymbolFlagsMap
- createHeaderSymbols(MachOPlatform &MOP,
- const SymbolStringPtr &HeaderStartSymbol) {
+ static MaterializationUnit::Interface
+ createHeaderInterface(MachOPlatform &MOP,
+ const SymbolStringPtr &HeaderStartSymbol) {
SymbolFlagsMap HeaderSymbolFlags;
HeaderSymbolFlags[HeaderStartSymbol] = JITSymbolFlags::Exported;
@@ -120,7 +119,8 @@ private:
HeaderSymbolFlags[MOP.getExecutionSession().intern(HS.Name)] =
JITSymbolFlags::Exported;
- return HeaderSymbolFlags;
+ return MaterializationUnit::Interface(std::move(HeaderSymbolFlags),
+ HeaderStartSymbol);
}
MachOPlatform &MOP;
diff --git a/llvm/lib/ExecutionEngine/Orc/Mangling.cpp b/llvm/lib/ExecutionEngine/Orc/Mangling.cpp
index 7b21e6a684ca..9c243c9bf1d2 100644
--- a/llvm/lib/ExecutionEngine/Orc/Mangling.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Mangling.cpp
@@ -7,13 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/Mangling.h"
-#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
-#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Mangler.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "orc"
@@ -85,188 +80,5 @@ void IRSymbolMapper::add(ExecutionSession &ES, const ManglingOptions &MO,
}
}
-static SymbolStringPtr addInitSymbol(SymbolFlagsMap &SymbolFlags,
- ExecutionSession &ES,
- StringRef ObjFileName) {
- SymbolStringPtr InitSymbol;
- size_t Counter = 0;
-
- do {
- std::string InitSymString;
- raw_string_ostream(InitSymString)
- << "$." << ObjFileName << ".__inits." << Counter++;
- InitSymbol = ES.intern(InitSymString);
- } while (SymbolFlags.count(InitSymbol));
-
- SymbolFlags[InitSymbol] = JITSymbolFlags::MaterializationSideEffectsOnly;
- return InitSymbol;
-}
-
-static Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
-getMachOObjectFileSymbolInfo(ExecutionSession &ES,
- const object::MachOObjectFile &Obj) {
- SymbolFlagsMap SymbolFlags;
-
- for (auto &Sym : Obj.symbols()) {
- Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
- if (!SymFlagsOrErr)
- // TODO: Test this error.
- return SymFlagsOrErr.takeError();
-
- // Skip symbols not defined in this object file.
- if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
- continue;
-
- // Skip symbols that are not global.
- if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
- continue;
-
- // Skip symbols that have type SF_File.
- if (auto SymType = Sym.getType()) {
- if (*SymType == object::SymbolRef::ST_File)
- continue;
- } else
- return SymType.takeError();
-
- auto Name = Sym.getName();
- if (!Name)
- return Name.takeError();
- auto InternedName = ES.intern(*Name);
- auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
- if (!SymFlags)
- return SymFlags.takeError();
-
- // Strip the 'exported' flag from MachO linker-private symbols.
- if (Name->startswith("l"))
- *SymFlags &= ~JITSymbolFlags::Exported;
-
- SymbolFlags[InternedName] = std::move(*SymFlags);
- }
-
- SymbolStringPtr InitSymbol;
- for (auto &Sec : Obj.sections()) {
- auto SecType = Obj.getSectionType(Sec);
- if ((SecType & MachO::SECTION_TYPE) == MachO::S_MOD_INIT_FUNC_POINTERS) {
- InitSymbol = addInitSymbol(SymbolFlags, ES, Obj.getFileName());
- break;
- }
- auto SegName = Obj.getSectionFinalSegmentName(Sec.getRawDataRefImpl());
- auto SecName = cantFail(Obj.getSectionName(Sec.getRawDataRefImpl()));
- if (MachOPlatform::isInitializerSection(SegName, SecName)) {
- InitSymbol = addInitSymbol(SymbolFlags, ES, Obj.getFileName());
- break;
- }
- }
-
- return std::make_pair(std::move(SymbolFlags), std::move(InitSymbol));
-}
-
-static Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
-getELFObjectFileSymbolInfo(ExecutionSession &ES,
- const object::ELFObjectFileBase &Obj) {
- SymbolFlagsMap SymbolFlags;
- for (auto &Sym : Obj.symbols()) {
- Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
- if (!SymFlagsOrErr)
- // TODO: Test this error.
- return SymFlagsOrErr.takeError();
-
- // Skip symbols not defined in this object file.
- if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
- continue;
-
- // Skip symbols that are not global.
- if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
- continue;
-
- // Skip symbols that have type SF_File.
- if (auto SymType = Sym.getType()) {
- if (*SymType == object::SymbolRef::ST_File)
- continue;
- } else
- return SymType.takeError();
-
- auto Name = Sym.getName();
- if (!Name)
- return Name.takeError();
- auto InternedName = ES.intern(*Name);
- auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
- if (!SymFlags)
- return SymFlags.takeError();
-
- // ELF STB_GNU_UNIQUE should map to Weak for ORC.
- if (Sym.getBinding() == ELF::STB_GNU_UNIQUE)
- *SymFlags |= JITSymbolFlags::Weak;
-
- SymbolFlags[InternedName] = std::move(*SymFlags);
- }
-
- SymbolStringPtr InitSymbol;
- for (auto &Sec : Obj.sections()) {
- if (auto SecName = Sec.getName()) {
- if (ELFNixPlatform::isInitializerSection(*SecName)) {
- InitSymbol = addInitSymbol(SymbolFlags, ES, Obj.getFileName());
- break;
- }
- }
- }
-
- return std::make_pair(std::move(SymbolFlags), InitSymbol);
-}
-
-Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
-getGenericObjectFileSymbolInfo(ExecutionSession &ES,
- const object::ObjectFile &Obj) {
- SymbolFlagsMap SymbolFlags;
- for (auto &Sym : Obj.symbols()) {
- Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
- if (!SymFlagsOrErr)
- // TODO: Test this error.
- return SymFlagsOrErr.takeError();
-
- // Skip symbols not defined in this object file.
- if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
- continue;
-
- // Skip symbols that are not global.
- if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
- continue;
-
- // Skip symbols that have type SF_File.
- if (auto SymType = Sym.getType()) {
- if (*SymType == object::SymbolRef::ST_File)
- continue;
- } else
- return SymType.takeError();
-
- auto Name = Sym.getName();
- if (!Name)
- return Name.takeError();
- auto InternedName = ES.intern(*Name);
- auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
- if (!SymFlags)
- return SymFlags.takeError();
-
- SymbolFlags[InternedName] = std::move(*SymFlags);
- }
-
- return std::make_pair(std::move(SymbolFlags), nullptr);
-}
-
-Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
-getObjectSymbolInfo(ExecutionSession &ES, MemoryBufferRef ObjBuffer) {
- auto Obj = object::ObjectFile::createObjectFile(ObjBuffer);
-
- if (!Obj)
- return Obj.takeError();
-
- if (auto *MachOObj = dyn_cast<object::MachOObjectFile>(Obj->get()))
- return getMachOObjectFileSymbolInfo(ES, *MachOObj);
- else if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj->get()))
- return getELFObjectFileSymbolInfo(ES, *ELFObj);
-
- return getGenericObjectFileSymbolInfo(ES, **Obj);
-}
-
} // End namespace orc.
} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
new file mode 100644
index 000000000000..c1ad569dd65d
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
@@ -0,0 +1,205 @@
+//===------ ObjectFileInterface.cpp - MU interface utils for objects ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h"
+#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
+#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+void addInitSymbol(MaterializationUnit::Interface &I, ExecutionSession &ES,
+ StringRef ObjFileName) {
+ assert(!I.InitSymbol && "I already has an init symbol");
+ size_t Counter = 0;
+
+ do {
+ std::string InitSymString;
+ raw_string_ostream(InitSymString)
+ << "$." << ObjFileName << ".__inits." << Counter++;
+ I.InitSymbol = ES.intern(InitSymString);
+ } while (I.SymbolFlags.count(I.InitSymbol));
+
+ I.SymbolFlags[I.InitSymbol] = JITSymbolFlags::MaterializationSideEffectsOnly;
+}
+
+static Expected<MaterializationUnit::Interface>
+getMachOObjectFileSymbolInfo(ExecutionSession &ES,
+ const object::MachOObjectFile &Obj) {
+ MaterializationUnit::Interface I;
+
+ for (auto &Sym : Obj.symbols()) {
+ Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
+ if (!SymFlagsOrErr)
+ // TODO: Test this error.
+ return SymFlagsOrErr.takeError();
+
+ // Skip symbols not defined in this object file.
+ if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
+ continue;
+
+ // Skip symbols that are not global.
+ if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
+ continue;
+
+ // Skip symbols that have type SF_File.
+ if (auto SymType = Sym.getType()) {
+ if (*SymType == object::SymbolRef::ST_File)
+ continue;
+ } else
+ return SymType.takeError();
+
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+ auto InternedName = ES.intern(*Name);
+ auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
+ if (!SymFlags)
+ return SymFlags.takeError();
+
+ // Strip the 'exported' flag from MachO linker-private symbols.
+ if (Name->startswith("l"))
+ *SymFlags &= ~JITSymbolFlags::Exported;
+
+ I.SymbolFlags[InternedName] = std::move(*SymFlags);
+ }
+
+ for (auto &Sec : Obj.sections()) {
+ auto SecType = Obj.getSectionType(Sec);
+ if ((SecType & MachO::SECTION_TYPE) == MachO::S_MOD_INIT_FUNC_POINTERS) {
+ addInitSymbol(I, ES, Obj.getFileName());
+ break;
+ }
+ auto SegName = Obj.getSectionFinalSegmentName(Sec.getRawDataRefImpl());
+ auto SecName = cantFail(Obj.getSectionName(Sec.getRawDataRefImpl()));
+ if (MachOPlatform::isInitializerSection(SegName, SecName)) {
+ addInitSymbol(I, ES, Obj.getFileName());
+ break;
+ }
+ }
+
+ return I;
+}
+
+static Expected<MaterializationUnit::Interface>
+getELFObjectFileSymbolInfo(ExecutionSession &ES,
+ const object::ELFObjectFileBase &Obj) {
+ MaterializationUnit::Interface I;
+
+ for (auto &Sym : Obj.symbols()) {
+ Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
+ if (!SymFlagsOrErr)
+ // TODO: Test this error.
+ return SymFlagsOrErr.takeError();
+
+ // Skip symbols not defined in this object file.
+ if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
+ continue;
+
+ // Skip symbols that are not global.
+ if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
+ continue;
+
+ // Skip symbols that have type SF_File.
+ if (auto SymType = Sym.getType()) {
+ if (*SymType == object::SymbolRef::ST_File)
+ continue;
+ } else
+ return SymType.takeError();
+
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+ auto InternedName = ES.intern(*Name);
+ auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
+ if (!SymFlags)
+ return SymFlags.takeError();
+
+ // ELF STB_GNU_UNIQUE should map to Weak for ORC.
+ if (Sym.getBinding() == ELF::STB_GNU_UNIQUE)
+ *SymFlags |= JITSymbolFlags::Weak;
+
+ I.SymbolFlags[InternedName] = std::move(*SymFlags);
+ }
+
+ SymbolStringPtr InitSymbol;
+ for (auto &Sec : Obj.sections()) {
+ if (auto SecName = Sec.getName()) {
+ if (ELFNixPlatform::isInitializerSection(*SecName)) {
+ addInitSymbol(I, ES, Obj.getFileName());
+ break;
+ }
+ }
+ }
+
+ return I;
+}
+
+Expected<MaterializationUnit::Interface>
+getGenericObjectFileSymbolInfo(ExecutionSession &ES,
+ const object::ObjectFile &Obj) {
+ MaterializationUnit::Interface I;
+
+ for (auto &Sym : Obj.symbols()) {
+ Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
+ if (!SymFlagsOrErr)
+ // TODO: Test this error.
+ return SymFlagsOrErr.takeError();
+
+ // Skip symbols not defined in this object file.
+ if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
+ continue;
+
+ // Skip symbols that are not global.
+ if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
+ continue;
+
+ // Skip symbols that have type SF_File.
+ if (auto SymType = Sym.getType()) {
+ if (*SymType == object::SymbolRef::ST_File)
+ continue;
+ } else
+ return SymType.takeError();
+
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+ auto InternedName = ES.intern(*Name);
+ auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
+ if (!SymFlags)
+ return SymFlags.takeError();
+
+ I.SymbolFlags[InternedName] = std::move(*SymFlags);
+ }
+
+ return I;
+}
+
+Expected<MaterializationUnit::Interface>
+getObjectFileInterface(ExecutionSession &ES, MemoryBufferRef ObjBuffer) {
+ auto Obj = object::ObjectFile::createObjectFile(ObjBuffer);
+
+ if (!Obj)
+ return Obj.takeError();
+
+ if (auto *MachOObj = dyn_cast<object::MachOObjectFile>(Obj->get()))
+ return getMachOObjectFileSymbolInfo(ES, *MachOObj);
+ else if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj->get()))
+ return getELFObjectFileSymbolInfo(ES, *ELFObj);
+
+ return getGenericObjectFileSymbolInfo(ES, **Obj);
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
index 6f840a079dd1..0d6a33c5685e 100644
--- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -23,12 +23,6 @@ using namespace llvm::orc;
namespace {
class LinkGraphMaterializationUnit : public MaterializationUnit {
-private:
- struct LinkGraphInterface {
- SymbolFlagsMap SymbolFlags;
- SymbolStringPtr InitSymbol;
- };
-
public:
static std::unique_ptr<LinkGraphMaterializationUnit>
Create(ObjectLinkingLayer &ObjLinkingLayer, std::unique_ptr<LinkGraph> G) {
@@ -44,9 +38,9 @@ public:
}
private:
- static LinkGraphInterface scanLinkGraph(ExecutionSession &ES, LinkGraph &G) {
+ static Interface scanLinkGraph(ExecutionSession &ES, LinkGraph &G) {
- LinkGraphInterface LGI;
+ Interface LGI;
for (auto *Sym : G.defined_symbols()) {
// Skip local symbols.
@@ -98,11 +92,9 @@ private:
}
LinkGraphMaterializationUnit(ObjectLinkingLayer &ObjLinkingLayer,
- std::unique_ptr<LinkGraph> G,
- LinkGraphInterface LGI)
- : MaterializationUnit(std::move(LGI.SymbolFlags),
- std::move(LGI.InitSymbol)),
- ObjLinkingLayer(ObjLinkingLayer), G(std::move(G)) {}
+ std::unique_ptr<LinkGraph> G, Interface LGI)
+ : MaterializationUnit(std::move(LGI)), ObjLinkingLayer(ObjLinkingLayer),
+ G(std::move(G)) {}
void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {
for (auto *Sym : G->defined_symbols())
@@ -257,7 +249,8 @@ public:
{
- // Check that InternedResult matches up with MR->getSymbols().
+ // Check that InternedResult matches up with MR->getSymbols(), overriding
+ // flags if requested.
// This guards against faulty transformations / compilers / object caches.
// First check that there aren't any missing symbols.
@@ -266,16 +259,20 @@ public:
SymbolNameVector MissingSymbols;
for (auto &KV : MR->getSymbols()) {
+ auto I = InternedResult.find(KV.first);
+
// If this is a materialization-side-effects only symbol then bump
// the counter and make sure it's *not* defined, otherwise make
// sure that it is defined.
if (KV.second.hasMaterializationSideEffectsOnly()) {
++NumMaterializationSideEffectsOnlySymbols;
- if (InternedResult.count(KV.first))
+ if (I != InternedResult.end())
ExtraSymbols.push_back(KV.first);
continue;
- } else if (!InternedResult.count(KV.first))
+ } else if (I == InternedResult.end())
MissingSymbols.push_back(KV.first);
+ else if (Layer.OverrideObjectFlags)
+ I->second.setFlags(KV.second);
}
// If there were missing symbols then report the error.
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
index 673f7394450f..77a8f5af8ba0 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
@@ -192,8 +192,8 @@ public:
LLVMOrcMaterializationUnitMaterializeFunction Materialize,
LLVMOrcMaterializationUnitDiscardFunction Discard,
LLVMOrcMaterializationUnitDestroyFunction Destroy)
- : llvm::orc::MaterializationUnit(std::move(InitialSymbolFlags),
- std::move(InitSymbol)),
+ : llvm::orc::MaterializationUnit(
+ Interface(std::move(InitialSymbolFlags), std::move(InitSymbol))),
Name(std::move(Name)), Ctx(Ctx), Materialize(Materialize),
Discard(Discard), Destroy(Destroy) {}
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index f16c6bdbfa4f..3f38d26869d4 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -124,8 +124,10 @@ void RuntimeDyldImpl::resolveRelocations() {
std::lock_guard<sys::Mutex> locked(lock);
// Print out the sections prior to relocation.
- LLVM_DEBUG(for (int i = 0, e = Sections.size(); i != e; ++i)
- dumpSectionMemory(Sections[i], "before relocations"););
+ LLVM_DEBUG({
+ for (SectionEntry &S : Sections)
+ dumpSectionMemory(S, "before relocations");
+ });
// First, resolve relocations associated with external symbols.
if (auto Err = resolveExternalSymbols()) {
@@ -136,21 +138,23 @@ void RuntimeDyldImpl::resolveRelocations() {
resolveLocalRelocations();
// Print out sections after relocation.
- LLVM_DEBUG(for (int i = 0, e = Sections.size(); i != e; ++i)
- dumpSectionMemory(Sections[i], "after relocations"););
+ LLVM_DEBUG({
+ for (SectionEntry &S : Sections)
+ dumpSectionMemory(S, "after relocations");
+ });
}
void RuntimeDyldImpl::resolveLocalRelocations() {
// Iterate over all outstanding relocations
- for (auto it = Relocations.begin(), e = Relocations.end(); it != e; ++it) {
+ for (const auto &Rel : Relocations) {
// The Section here (Sections[i]) refers to the section in which the
// symbol for the relocation is located. The SectionID in the relocation
// entry provides the section to which the relocation will be applied.
- unsigned Idx = it->first;
+ unsigned Idx = Rel.first;
uint64_t Addr = getSectionLoadAddress(Idx);
LLVM_DEBUG(dbgs() << "Resolving relocations Section #" << Idx << "\t"
<< format("%p", (uintptr_t)Addr) << "\n");
- resolveRelocationList(it->second, Addr);
+ resolveRelocationList(Rel.second, Addr);
}
Relocations.clear();
}
@@ -457,9 +461,9 @@ static uint64_t
computeAllocationSizeForSections(std::vector<uint64_t> &SectionSizes,
uint64_t Alignment) {
uint64_t TotalSize = 0;
- for (size_t Idx = 0, Cnt = SectionSizes.size(); Idx < Cnt; Idx++) {
+ for (uint64_t SectionSize : SectionSizes) {
uint64_t AlignedSize =
- (SectionSizes[Idx] + Alignment - 1) / Alignment * Alignment;
+ (SectionSize + Alignment - 1) / Alignment * Alignment;
TotalSize += AlignedSize;
}
return TotalSize;
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 18f1a2314853..5157d51fd18c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -996,7 +996,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
AllocaIP = Builder.saveIP();
InsertPointTy AfterIP =
- applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true);
+ applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait);
BasicBlock *LoopAfterBB = AfterIP.getBlock();
Instruction *SplitPos = LoopAfterBB->getTerminator();
if (!isa_and_nonnull<BranchInst>(SplitPos))
@@ -1156,7 +1156,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
Builder.SetInsertPoint(NonAtomicRedBlock);
for (auto En : enumerate(ReductionInfos)) {
const ReductionInfo &RI = En.value();
- Type *ValueType = RI.getElementType();
+ Type *ValueType = RI.ElementType;
Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
"red.value." + Twine(En.index()));
Value *PrivateRedValue =
@@ -1181,8 +1181,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
Builder.SetInsertPoint(AtomicRedBlock);
if (CanGenerateAtomic) {
for (const ReductionInfo &RI : ReductionInfos) {
- Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.Variable,
- RI.PrivateVariable));
+ Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.ElementType,
+ RI.Variable, RI.PrivateVariable));
if (!Builder.GetInsertBlock())
return InsertPointTy();
}
@@ -1207,13 +1207,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
RedArrayTy, LHSArrayPtr, 0, En.index());
Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
- Value *LHS = Builder.CreateLoad(RI.getElementType(), LHSPtr);
+ Value *LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
RedArrayTy, RHSArrayPtr, 0, En.index());
Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
Value *RHSPtr =
Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
- Value *RHS = Builder.CreateLoad(RI.getElementType(), RHSPtr);
+ Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
Value *Reduced;
Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
if (!Builder.GetInsertBlock())
@@ -1329,13 +1329,10 @@ CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
LoopInfos.emplace_front();
CanonicalLoopInfo *CL = &LoopInfos.front();
- CL->Preheader = Preheader;
CL->Header = Header;
CL->Cond = Cond;
- CL->Body = Body;
CL->Latch = Latch;
CL->Exit = Exit;
- CL->After = After;
#ifndef NDEBUG
CL->assertOK();
@@ -1359,7 +1356,7 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
// Split the loop at the insertion point: Branch to the preheader and move
// every following instruction to after the loop (the After BB). Also, the
// new successor is the loop's after block.
- Builder.CreateBr(CL->Preheader);
+ Builder.CreateBr(CL->getPreheader());
After->getInstList().splice(After->begin(), BB->getInstList(),
Builder.GetInsertPoint(), BB->end());
After->replaceSuccessorsPhiUsesWith(BB, After);
@@ -1791,6 +1788,12 @@ OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
BasicBlock *OrigAfter = Outermost->getAfter();
Function *F = OrigPreheader->getParent();
+ // Loop control blocks that may become orphaned later.
+ SmallVector<BasicBlock *, 12> OldControlBBs;
+ OldControlBBs.reserve(6 * Loops.size());
+ for (CanonicalLoopInfo *Loop : Loops)
+ Loop->collectControlBlocks(OldControlBBs);
+
// Setup the IRBuilder for inserting the trip count computation.
Builder.SetCurrentDebugLocation(DL);
if (ComputeIP.isSet())
@@ -1828,7 +1831,7 @@ OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
Value *Leftover = Result->getIndVar();
SmallVector<Value *> NewIndVars;
- NewIndVars.set_size(NumLoops);
+ NewIndVars.resize(NumLoops);
for (int i = NumLoops - 1; i >= 1; --i) {
Value *OrigTripCount = Loops[i]->getTripCount();
@@ -1886,10 +1889,6 @@ OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
// Remove unused parts of the input loops.
- SmallVector<BasicBlock *, 12> OldControlBBs;
- OldControlBBs.reserve(6 * Loops.size());
- for (CanonicalLoopInfo *Loop : Loops)
- Loop->collectControlBlocks(OldControlBBs);
removeUnusedBlocksFromParent(OldControlBBs);
for (CanonicalLoopInfo *L : Loops)
@@ -1915,6 +1914,12 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
BasicBlock *InnerEnter = InnermostLoop->getBody();
BasicBlock *InnerLatch = InnermostLoop->getLatch();
+ // Loop control blocks that may become orphaned later.
+ SmallVector<BasicBlock *, 12> OldControlBBs;
+ OldControlBBs.reserve(6 * Loops.size());
+ for (CanonicalLoopInfo *Loop : Loops)
+ Loop->collectControlBlocks(OldControlBBs);
+
// Collect original trip counts and induction variable to be accessible by
// index. Also, the structure of the original loops is not preserved during
// the construction of the tiled loops, so do it before we scavenge the BBs of
@@ -2074,10 +2079,6 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
}
// Remove unused parts of the original loops.
- SmallVector<BasicBlock *, 12> OldControlBBs;
- OldControlBBs.reserve(6 * Loops.size());
- for (CanonicalLoopInfo *Loop : Loops)
- Loop->collectControlBlocks(OldControlBBs);
removeUnusedBlocksFromParent(OldControlBBs);
for (CanonicalLoopInfo *L : Loops)
@@ -3079,7 +3080,7 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
- AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) {
+ AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
if (!updateToLocation(Loc))
return Loc.IP;
@@ -3097,7 +3098,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
});
emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
- IsXLHSInRHSPart);
+ IsXBinopExpr);
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
return Builder.saveIP();
}
@@ -3134,13 +3135,13 @@ std::pair<Value *, Value *>
OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr,
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
AtomicUpdateCallbackTy &UpdateOp,
- bool VolatileX, bool IsXLHSInRHSPart) {
+ bool VolatileX, bool IsXBinopExpr) {
Type *XElemTy = X->getType()->getPointerElementType();
bool DoCmpExch =
((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
(RMWOp == AtomicRMWInst::FSub) ||
- (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart);
+ (RMWOp == AtomicRMWInst::Sub && !IsXBinopExpr);
std::pair<Value *, Value *> Res;
if (XElemTy->isIntegerTy() && !DoCmpExch) {
@@ -3232,7 +3233,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
- bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) {
+ bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
if (!updateToLocation(Loc))
return Loc.IP;
@@ -3251,9 +3252,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
// If UpdateExpr is 'x' updated with some `expr` not based on 'x',
// 'x' is simply atomically rewritten with 'expr'.
AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
- std::pair<Value *, Value *> Result =
- emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp,
- X.IsVolatile, IsXLHSInRHSPart);
+ std::pair<Value *, Value *> Result = emitAtomicUpdate(
+ AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp, X.IsVolatile, IsXBinopExpr);
Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
@@ -3321,7 +3321,16 @@ void CanonicalLoopInfo::collectControlBlocks(
// flow. For consistency, this also means we do not add the Body block, which
// is just the entry to the body code.
BBs.reserve(BBs.size() + 6);
- BBs.append({Preheader, Header, Cond, Latch, Exit, After});
+ BBs.append({getPreheader(), Header, Cond, Latch, Exit, getAfter()});
+}
+
+BasicBlock *CanonicalLoopInfo::getPreheader() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ for (BasicBlock *Pred : predecessors(Header)) {
+ if (Pred != Latch)
+ return Pred;
+ }
+ llvm_unreachable("Missing preheader");
}
void CanonicalLoopInfo::assertOK() const {
@@ -3330,6 +3339,10 @@ void CanonicalLoopInfo::assertOK() const {
if (!isValid())
return;
+ BasicBlock *Preheader = getPreheader();
+ BasicBlock *Body = getBody();
+ BasicBlock *After = getAfter();
+
// Verify standard control-flow we use for OpenMP loops.
assert(Preheader);
assert(isa<BranchInst>(Preheader->getTerminator()) &&
@@ -3415,11 +3428,8 @@ void CanonicalLoopInfo::assertOK() const {
}
void CanonicalLoopInfo::invalidate() {
- Preheader = nullptr;
Header = nullptr;
Cond = nullptr;
- Body = nullptr;
Latch = nullptr;
Exit = nullptr;
- After = nullptr;
}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index c9748e1387eb..bbe0c97e60a2 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -512,10 +512,8 @@ void TypePrinting::incorporateTypes() {
// the unnamed ones out to a numbering and remove the anonymous structs.
unsigned NextNumber = 0;
- std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E;
- for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) {
- StructType *STy = *I;
-
+ std::vector<StructType *>::iterator NextToUse = NamedTypes.begin();
+ for (StructType *STy : NamedTypes) {
// Ignore anonymous types.
if (STy->isLiteral())
continue;
@@ -1450,6 +1448,12 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
return;
}
+ if (const auto *NC = dyn_cast<NoCFIValue>(CV)) {
+ Out << "no_cfi ";
+ WriteAsOperandInternal(Out, NC->getGlobalValue(), WriterCtx);
+ return;
+ }
+
if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
Type *ETy = CA->getType()->getElementType();
Out << '[';
@@ -1583,11 +1587,9 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Out << ", ";
}
- if (CE->hasIndices()) {
- ArrayRef<unsigned> Indices = CE->getIndices();
- for (unsigned i = 0, e = Indices.size(); i != e; ++i)
- Out << ", " << Indices[i];
- }
+ if (CE->hasIndices())
+ for (unsigned I : CE->getIndices())
+ Out << ", " << I;
if (CE->isCast()) {
Out << " to ";
@@ -3528,8 +3530,8 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
}
maybePrintComdat(Out, *GV);
- if (GV->getAlignment())
- Out << ", align " << GV->getAlignment();
+ if (MaybeAlign A = GV->getAlign())
+ Out << ", align " << A->value();
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
GV->getAllMetadata(MDs);
@@ -3637,13 +3639,13 @@ void AssemblyWriter::printTypeIdentities() {
}
auto &NamedTypes = TypePrinter.getNamedTypes();
- for (unsigned I = 0, E = NamedTypes.size(); I != E; ++I) {
- PrintLLVMName(Out, NamedTypes[I]->getName(), LocalPrefix);
+ for (StructType *NamedType : NamedTypes) {
+ PrintLLVMName(Out, NamedType->getName(), LocalPrefix);
Out << " = type ";
// Make sure we print out at least one level of the type structure, so
// that we do not get %FILE = type %FILE
- TypePrinter.printStructBody(NamedTypes[I], Out);
+ TypePrinter.printStructBody(NamedType, Out);
Out << '\n';
}
}
@@ -3757,8 +3759,8 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << '"';
}
maybePrintComdat(Out, *F);
- if (F->getAlignment())
- Out << " align " << F->getAlignment();
+ if (MaybeAlign A = F->getAlign())
+ Out << " align " << A->value();
if (F->hasGC())
Out << " gc \"" << F->getGC() << '"';
if (F->hasPrefixData()) {
@@ -4239,8 +4241,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", ";
writeOperand(AI->getArraySize(), true);
}
- if (AI->getAlignment()) {
- Out << ", align " << AI->getAlignment();
+ if (MaybeAlign A = AI->getAlign()) {
+ Out << ", align " << A->value();
}
unsigned AddrSpace = AI->getType()->getAddressSpace();
@@ -4310,13 +4312,13 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (LI->isAtomic())
writeAtomic(LI->getContext(), LI->getOrdering(), LI->getSyncScopeID());
- if (LI->getAlignment())
- Out << ", align " << LI->getAlignment();
+ if (MaybeAlign A = LI->getAlign())
+ Out << ", align " << A->value();
} else if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
if (SI->isAtomic())
writeAtomic(SI->getContext(), SI->getOrdering(), SI->getSyncScopeID());
- if (SI->getAlignment())
- Out << ", align " << SI->getAlignment();
+ if (MaybeAlign A = SI->getAlign())
+ Out << ", align " << A->value();
} else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&I)) {
writeAtomicCmpXchg(CXI->getContext(), CXI->getSuccessOrdering(),
CXI->getFailureOrdering(), CXI->getSyncScopeID());
diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h
index c5bbe6571096..1153fb827b56 100644
--- a/llvm/lib/IR/AttributeImpl.h
+++ b/llvm/lib/IR/AttributeImpl.h
@@ -253,7 +253,8 @@ public:
uint64_t getDereferenceableBytes() const;
uint64_t getDereferenceableOrNullBytes() const;
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
- std::pair<unsigned, unsigned> getVScaleRangeArgs() const;
+ unsigned getVScaleRangeMin() const;
+ Optional<unsigned> getVScaleRangeMax() const;
std::string getAsString(bool InAttrGrp) const;
Type *getAttributeType(Attribute::AttrKind Kind) const;
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index f81a446d6e46..c899afae6cce 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -78,15 +78,18 @@ unpackAllocSizeArgs(uint64_t Num) {
return std::make_pair(ElemSizeArg, NumElemsArg);
}
-static uint64_t packVScaleRangeArgs(unsigned MinValue, unsigned MaxValue) {
- return uint64_t(MinValue) << 32 | MaxValue;
+static uint64_t packVScaleRangeArgs(unsigned MinValue,
+ Optional<unsigned> MaxValue) {
+ return uint64_t(MinValue) << 32 | MaxValue.getValueOr(0);
}
-static std::pair<unsigned, unsigned> unpackVScaleRangeArgs(uint64_t Value) {
+static std::pair<unsigned, Optional<unsigned>>
+unpackVScaleRangeArgs(uint64_t Value) {
unsigned MaxValue = Value & std::numeric_limits<unsigned>::max();
unsigned MinValue = Value >> 32;
- return std::make_pair(MinValue, MaxValue);
+ return std::make_pair(MinValue,
+ MaxValue > 0 ? MaxValue : Optional<unsigned>());
}
Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
@@ -354,10 +357,16 @@ std::pair<unsigned, Optional<unsigned>> Attribute::getAllocSizeArgs() const {
return unpackAllocSizeArgs(pImpl->getValueAsInt());
}
-std::pair<unsigned, unsigned> Attribute::getVScaleRangeArgs() const {
+unsigned Attribute::getVScaleRangeMin() const {
+ assert(hasAttribute(Attribute::VScaleRange) &&
+ "Trying to get vscale args from non-vscale attribute");
+ return unpackVScaleRangeArgs(pImpl->getValueAsInt()).first;
+}
+
+Optional<unsigned> Attribute::getVScaleRangeMax() const {
assert(hasAttribute(Attribute::VScaleRange) &&
"Trying to get vscale args from non-vscale attribute");
- return unpackVScaleRangeArgs(pImpl->getValueAsInt());
+ return unpackVScaleRangeArgs(pImpl->getValueAsInt()).second;
}
std::string Attribute::getAsString(bool InAttrGrp) const {
@@ -428,13 +437,13 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
}
if (hasAttribute(Attribute::VScaleRange)) {
- unsigned MinValue, MaxValue;
- std::tie(MinValue, MaxValue) = getVScaleRangeArgs();
+ unsigned MinValue = getVScaleRangeMin();
+ Optional<unsigned> MaxValue = getVScaleRangeMax();
std::string Result = "vscale_range(";
Result += utostr(MinValue);
Result += ',';
- Result += utostr(MaxValue);
+ Result += utostr(MaxValue.getValueOr(0));
Result += ')';
return Result;
}
@@ -717,9 +726,12 @@ std::pair<unsigned, Optional<unsigned>> AttributeSet::getAllocSizeArgs() const {
: std::pair<unsigned, Optional<unsigned>>(0, 0);
}
-std::pair<unsigned, unsigned> AttributeSet::getVScaleRangeArgs() const {
- return SetNode ? SetNode->getVScaleRangeArgs()
- : std::pair<unsigned, unsigned>(0, 0);
+unsigned AttributeSet::getVScaleRangeMin() const {
+ return SetNode ? SetNode->getVScaleRangeMin() : 1;
+}
+
+Optional<unsigned> AttributeSet::getVScaleRangeMax() const {
+ return SetNode ? SetNode->getVScaleRangeMax() : None;
}
std::string AttributeSet::getAsString(bool InAttrGrp) const {
@@ -897,10 +909,16 @@ AttributeSetNode::getAllocSizeArgs() const {
return std::make_pair(0, 0);
}
-std::pair<unsigned, unsigned> AttributeSetNode::getVScaleRangeArgs() const {
+unsigned AttributeSetNode::getVScaleRangeMin() const {
if (auto A = findEnumAttribute(Attribute::VScaleRange))
- return A->getVScaleRangeArgs();
- return std::make_pair(0, 0);
+ return A->getVScaleRangeMin();
+ return 1;
+}
+
+Optional<unsigned> AttributeSetNode::getVScaleRangeMax() const {
+ if (auto A = findEnumAttribute(Attribute::VScaleRange))
+ return A->getVScaleRangeMax();
+ return None;
}
std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
@@ -1118,16 +1136,21 @@ AttributeList AttributeList::get(LLVMContext &C, AttributeSet FnAttrs,
}
AttributeList AttributeList::get(LLVMContext &C, unsigned Index,
- const AttrBuilder &B) {
- if (!B.hasAttributes())
+ AttributeSet Attrs) {
+ if (!Attrs.hasAttributes())
return {};
Index = attrIdxToArrayIdx(Index);
SmallVector<AttributeSet, 8> AttrSets(Index + 1);
- AttrSets[Index] = AttributeSet::get(C, B);
+ AttrSets[Index] = Attrs;
return getImpl(C, AttrSets);
}
AttributeList AttributeList::get(LLVMContext &C, unsigned Index,
+ const AttrBuilder &B) {
+ return get(C, Index, AttributeSet::get(C, B));
+}
+
+AttributeList AttributeList::get(LLVMContext &C, unsigned Index,
ArrayRef<Attribute::AttrKind> Kinds) {
SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
for (const auto K : Kinds)
@@ -1623,8 +1646,12 @@ std::pair<unsigned, Optional<unsigned>> AttrBuilder::getAllocSizeArgs() const {
return unpackAllocSizeArgs(getRawIntAttr(Attribute::AllocSize));
}
-std::pair<unsigned, unsigned> AttrBuilder::getVScaleRangeArgs() const {
- return unpackVScaleRangeArgs(getRawIntAttr(Attribute::VScaleRange));
+unsigned AttrBuilder::getVScaleRangeMin() const {
+ return unpackVScaleRangeArgs(getRawIntAttr(Attribute::VScaleRange)).first;
+}
+
+Optional<unsigned> AttrBuilder::getVScaleRangeMax() const {
+ return unpackVScaleRangeArgs(getRawIntAttr(Attribute::VScaleRange)).second;
}
AttrBuilder &AttrBuilder::addAlignmentAttr(MaybeAlign Align) {
@@ -1669,7 +1696,7 @@ AttrBuilder &AttrBuilder::addAllocSizeAttrFromRawRepr(uint64_t RawArgs) {
}
AttrBuilder &AttrBuilder::addVScaleRangeAttr(unsigned MinValue,
- unsigned MaxValue) {
+ Optional<unsigned> MaxValue) {
return addVScaleRangeAttrFromRawRepr(packVScaleRangeArgs(MinValue, MaxValue));
}
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d73d1e9c20b3..b8ad2b294b87 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -702,6 +702,31 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
return true;
}
+
+ if (Name == "arm.mve.vctp64" &&
+ cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
+ // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
+ // function and deal with it below in UpgradeIntrinsicCall.
+ rename(F);
+ return true;
+ }
+ // These too are changed to accept a v2i1 insteead of the old v4i1.
+ if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
+ Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
+ Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
+ Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
+ Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
+ Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
+ Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
+ Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
+ Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
+ Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
+ Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
+ Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
+ Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
+ Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
+ return true;
+
break;
}
@@ -1803,6 +1828,96 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
}
}
+static Value *UpgradeARMIntrinsicCall(StringRef Name, CallInst *CI, Function *F,
+ IRBuilder<> &Builder) {
+ if (Name == "mve.vctp64.old") {
+ // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
+ // correct type.
+ Value *VCTP = Builder.CreateCall(
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
+ CI->getArgOperand(0), CI->getName());
+ Value *C1 = Builder.CreateCall(
+ Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::arm_mve_pred_v2i,
+ {VectorType::get(Builder.getInt1Ty(), 2, false)}),
+ VCTP);
+ return Builder.CreateCall(
+ Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::arm_mve_pred_i2v,
+ {VectorType::get(Builder.getInt1Ty(), 4, false)}),
+ C1);
+ } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
+ Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
+ Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
+ Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
+ Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
+ Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
+ Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
+ Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
+ Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
+ Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
+ Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
+ Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
+ Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
+ Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
+ std::vector<Type *> Tys;
+ unsigned ID = CI->getIntrinsicID();
+ Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
+ switch (ID) {
+ case Intrinsic::arm_mve_mull_int_predicated:
+ case Intrinsic::arm_mve_vqdmull_predicated:
+ case Intrinsic::arm_mve_vldr_gather_base_predicated:
+ Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
+ break;
+ case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
+ case Intrinsic::arm_mve_vstr_scatter_base_predicated:
+ case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
+ Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
+ V2I1Ty};
+ break;
+ case Intrinsic::arm_mve_vldr_gather_offset_predicated:
+ Tys = {CI->getType(), CI->getOperand(0)->getType(),
+ CI->getOperand(1)->getType(), V2I1Ty};
+ break;
+ case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
+ Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
+ CI->getOperand(2)->getType(), V2I1Ty};
+ break;
+ case Intrinsic::arm_cde_vcx1q_predicated:
+ case Intrinsic::arm_cde_vcx1qa_predicated:
+ case Intrinsic::arm_cde_vcx2q_predicated:
+ case Intrinsic::arm_cde_vcx2qa_predicated:
+ case Intrinsic::arm_cde_vcx3q_predicated:
+ case Intrinsic::arm_cde_vcx3qa_predicated:
+ Tys = {CI->getOperand(1)->getType(), V2I1Ty};
+ break;
+ default:
+ llvm_unreachable("Unhandled Intrinsic!");
+ }
+
+ std::vector<Value *> Ops;
+ for (Value *Op : CI->args()) {
+ Type *Ty = Op->getType();
+ if (Ty->getScalarSizeInBits() == 1) {
+ Value *C1 = Builder.CreateCall(
+ Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::arm_mve_pred_v2i,
+ {VectorType::get(Builder.getInt1Ty(), 4, false)}),
+ Op);
+ Op = Builder.CreateCall(
+ Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
+ C1);
+ }
+ Ops.push_back(Op);
+ }
+
+ Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
+ return Builder.CreateCall(Fn, Ops, CI->getName());
+ }
+ llvm_unreachable("Unknown function for ARM CallInst upgrade.");
+}
+
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
@@ -1826,6 +1941,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
bool IsNVVM = Name.startswith("nvvm.");
if (IsNVVM)
Name = Name.substr(5);
+ bool IsARM = Name.startswith("arm.");
+ if (IsARM)
+ Name = Name.substr(4);
if (IsX86 && Name.startswith("sse4a.movnt.")) {
Module *M = F->getParent();
@@ -2289,14 +2407,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (CI->arg_size() >= 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
- Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
- CI->getArgOperand(1), CI->getArgOperand(2),
- /*Aligned*/false);
- } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
- Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
- CI->getArgOperand(1),CI->getArgOperand(2),
- /*Aligned*/true);
+ } else if (IsX86 && Name.startswith("avx512.mask.load")) {
+ // "avx512.mask.loadu." or "avx512.mask.load."
+ bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
+ Rep =
+ UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), Aligned);
} else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
auto *ResultTy = cast<FixedVectorType>(CI->getType());
Type *PtrTy = ResultTy->getElementType();
@@ -3649,6 +3765,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
F->getParent(), Intrinsic::convert_from_fp16,
{Builder.getFloatTy()}),
CI->getArgOperand(0), "h2f");
+ } else if (IsARM) {
+ Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index ed1956e0f7e9..7beafc485d09 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -450,8 +450,8 @@ BasicBlock *BasicBlock::splitBasicBlockBefore(iterator I, const Twine &BBName) {
void BasicBlock::replacePhiUsesWith(BasicBlock *Old, BasicBlock *New) {
// N.B. This might not be a complete BasicBlock, so don't assume
// that it ends with a non-phi instruction.
- for (iterator II = begin(), IE = end(); II != IE; ++II) {
- PHINode *PN = dyn_cast<PHINode>(II);
+ for (Instruction &I : *this) {
+ PHINode *PN = dyn_cast<PHINode>(&I);
if (!PN)
break;
PN->replaceIncomingBlockWith(Old, New);
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 437fd0558447..8668fe82601c 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -1801,46 +1801,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
} else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
const APFloat &C1V = cast<ConstantFP>(C1)->getValueAPF();
const APFloat &C2V = cast<ConstantFP>(C2)->getValueAPF();
- APFloat::cmpResult R = C1V.compare(C2V);
- switch (pred) {
- default: llvm_unreachable("Invalid FCmp Predicate");
- case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy);
- case FCmpInst::FCMP_TRUE: return Constant::getAllOnesValue(ResultTy);
- case FCmpInst::FCMP_UNO:
- return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered);
- case FCmpInst::FCMP_ORD:
- return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered);
- case FCmpInst::FCMP_UEQ:
- return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
- R==APFloat::cmpEqual);
- case FCmpInst::FCMP_OEQ:
- return ConstantInt::get(ResultTy, R==APFloat::cmpEqual);
- case FCmpInst::FCMP_UNE:
- return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual);
- case FCmpInst::FCMP_ONE:
- return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
- R==APFloat::cmpGreaterThan);
- case FCmpInst::FCMP_ULT:
- return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
- R==APFloat::cmpLessThan);
- case FCmpInst::FCMP_OLT:
- return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan);
- case FCmpInst::FCMP_UGT:
- return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
- R==APFloat::cmpGreaterThan);
- case FCmpInst::FCMP_OGT:
- return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan);
- case FCmpInst::FCMP_ULE:
- return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan);
- case FCmpInst::FCMP_OLE:
- return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
- R==APFloat::cmpEqual);
- case FCmpInst::FCMP_UGE:
- return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan);
- case FCmpInst::FCMP_OGE:
- return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpEqual);
- }
+ CmpInst::Predicate Predicate = CmpInst::Predicate(pred);
+ return ConstantInt::get(ResultTy, FCmpInst::compare(C1V, C2V, Predicate));
} else if (auto *C1VTy = dyn_cast<VectorType>(C1->getType())) {
// Fast path for splatted constants.
@@ -2215,9 +2177,8 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
if (C->isNullValue()) {
bool isNull = true;
- for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
- if (!isa<UndefValue>(Idxs[i]) &&
- !cast<Constant>(Idxs[i])->isNullValue()) {
+ for (Value *Idx : Idxs)
+ if (!isa<UndefValue>(Idx) && !cast<Constant>(Idx)->isNullValue()) {
isNull = false;
break;
}
@@ -2233,8 +2194,8 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
// The GEP returns a vector of pointers when one of more of
// its arguments is a vector.
- for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
- if (auto *VT = dyn_cast<VectorType>(Idxs[i]->getType())) {
+ for (Value *Idx : Idxs) {
+ if (auto *VT = dyn_cast<VectorType>(Idx->getType())) {
assert((!isa<VectorType>(GEPTy) || isa<ScalableVectorType>(GEPTy) ==
isa<ScalableVectorType>(VT)) &&
"Mismatched GEPTy vector types");
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index c66cfb6e9ac1..837be910f6d8 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -535,6 +535,9 @@ void llvm::deleteConstant(Constant *C) {
case Constant::DSOLocalEquivalentVal:
delete static_cast<DSOLocalEquivalent *>(C);
break;
+ case Constant::NoCFIValueVal:
+ delete static_cast<NoCFIValue *>(C);
+ break;
case Constant::UndefValueVal:
delete static_cast<UndefValue *>(C);
break;
@@ -1296,9 +1299,10 @@ Constant *ConstantArray::getImpl(ArrayType *Ty, ArrayRef<Constant*> V) {
if (V.empty())
return ConstantAggregateZero::get(Ty);
- for (unsigned i = 0, e = V.size(); i != e; ++i) {
- assert(V[i]->getType() == Ty->getElementType() &&
+ for (Constant *C : V) {
+ assert(C->getType() == Ty->getElementType() &&
"Wrong type in array element initializer");
+ (void)C;
}
// If this is an all-zero array, return a ConstantAggregateZero object. If
@@ -1364,12 +1368,12 @@ Constant *ConstantStruct::get(StructType *ST, ArrayRef<Constant*> V) {
isZero = V[0]->isNullValue();
// PoisonValue inherits UndefValue, so its check is not necessary.
if (isUndef || isZero) {
- for (unsigned i = 0, e = V.size(); i != e; ++i) {
- if (!V[i]->isNullValue())
+ for (Constant *C : V) {
+ if (!C->isNullValue())
isZero = false;
- if (!isa<PoisonValue>(V[i]))
+ if (!isa<PoisonValue>(C))
isPoison = false;
- if (isa<PoisonValue>(V[i]) || !isa<UndefValue>(V[i]))
+ if (isa<PoisonValue>(C) || !isa<UndefValue>(C))
isUndef = false;
}
}
@@ -1962,6 +1966,47 @@ Value *DSOLocalEquivalent::handleOperandChangeImpl(Value *From, Value *To) {
return nullptr;
}
+NoCFIValue *NoCFIValue::get(GlobalValue *GV) {
+ NoCFIValue *&NC = GV->getContext().pImpl->NoCFIValues[GV];
+ if (!NC)
+ NC = new NoCFIValue(GV);
+
+ assert(NC->getGlobalValue() == GV &&
+ "NoCFIValue does not match the expected global value");
+ return NC;
+}
+
+NoCFIValue::NoCFIValue(GlobalValue *GV)
+ : Constant(GV->getType(), Value::NoCFIValueVal, &Op<0>(), 1) {
+ setOperand(0, GV);
+}
+
+/// Remove the constant from the constant table.
+void NoCFIValue::destroyConstantImpl() {
+ const GlobalValue *GV = getGlobalValue();
+ GV->getContext().pImpl->NoCFIValues.erase(GV);
+}
+
+Value *NoCFIValue::handleOperandChangeImpl(Value *From, Value *To) {
+ assert(From == getGlobalValue() && "Changing value does not match operand.");
+
+ GlobalValue *GV = dyn_cast<GlobalValue>(To->stripPointerCasts());
+ assert(GV && "Can only replace the operands with a global value");
+
+ NoCFIValue *&NewNC = getContext().pImpl->NoCFIValues[GV];
+ if (NewNC)
+ return llvm::ConstantExpr::getBitCast(NewNC, getType());
+
+ getContext().pImpl->NoCFIValues.erase(getGlobalValue());
+ NewNC = this;
+ setOperand(0, GV);
+
+ if (GV->getType() != getType())
+ mutateType(GV->getType());
+
+ return nullptr;
+}
+
//---- ConstantExpr::get() implementations.
//
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 2c396ae97499..a263d2536541 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -1696,6 +1696,14 @@ LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
return wrap(ConstantExpr::getGetElementPtr(Ty, Val, IdxList));
}
+LLVMValueRef LLVMConstGEP2(LLVMTypeRef Ty, LLVMValueRef ConstantVal,
+ LLVMValueRef *ConstantIndices, unsigned NumIndices) {
+ ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
+ NumIndices);
+ Constant *Val = unwrap<Constant>(ConstantVal);
+ return wrap(ConstantExpr::getGetElementPtr(unwrap(Ty), Val, IdxList));
+}
+
LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
LLVMValueRef *ConstantIndices,
unsigned NumIndices) {
@@ -1707,6 +1715,15 @@ LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
return wrap(ConstantExpr::getInBoundsGetElementPtr(Ty, Val, IdxList));
}
+LLVMValueRef LLVMConstInBoundsGEP2(LLVMTypeRef Ty, LLVMValueRef ConstantVal,
+ LLVMValueRef *ConstantIndices,
+ unsigned NumIndices) {
+ ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
+ NumIndices);
+ Constant *Val = unwrap<Constant>(ConstantVal);
+ return wrap(ConstantExpr::getInBoundsGetElementPtr(unwrap(Ty), Val, IdxList));
+}
+
LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
@@ -3007,13 +3024,17 @@ LLVMTypeRef LLVMGetAllocatedType(LLVMValueRef Alloca) {
/*--.. Operations on gep instructions (only) ...............................--*/
LLVMBool LLVMIsInBounds(LLVMValueRef GEP) {
- return unwrap<GetElementPtrInst>(GEP)->isInBounds();
+ return unwrap<GEPOperator>(GEP)->isInBounds();
}
void LLVMSetIsInBounds(LLVMValueRef GEP, LLVMBool InBounds) {
return unwrap<GetElementPtrInst>(GEP)->setIsInBounds(InBounds);
}
+LLVMTypeRef LLVMGetGEPSourceElementType(LLVMValueRef GEP) {
+ return wrap(unwrap<GEPOperator>(GEP)->getSourceElementType());
+}
+
/*--.. Operations on phi nodes .............................................--*/
void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
@@ -3039,7 +3060,7 @@ LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index) {
unsigned LLVMGetNumIndices(LLVMValueRef Inst) {
auto *I = unwrap(Inst);
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (auto *GEP = dyn_cast<GEPOperator>(I))
return GEP->getNumIndices();
if (auto *EV = dyn_cast<ExtractValueInst>(I))
return EV->getNumIndices();
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index 548962bd6a98..35af22034a12 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -671,11 +671,11 @@ DIBuilder::getOrCreateMacroArray(ArrayRef<Metadata *> Elements) {
DITypeRefArray DIBuilder::getOrCreateTypeArray(ArrayRef<Metadata *> Elements) {
SmallVector<llvm::Metadata *, 16> Elts;
- for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
- if (Elements[i] && isa<MDNode>(Elements[i]))
- Elts.push_back(cast<DIType>(Elements[i]));
+ for (Metadata *E : Elements) {
+ if (isa_and_nonnull<MDNode>(E))
+ Elts.push_back(cast<DIType>(E));
else
- Elts.push_back(Elements[i]);
+ Elts.push_back(E);
}
return DITypeRefArray(MDNode::get(VMContext, Elts));
}
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 2ace18048262..61b2b13bfd03 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -124,26 +124,25 @@ LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const {
// PointerAlignElem, PointerAlign support
//===----------------------------------------------------------------------===//
-PointerAlignElem PointerAlignElem::get(uint32_t AddressSpace, Align ABIAlign,
- Align PrefAlign, uint32_t TypeByteWidth,
- uint32_t IndexWidth) {
+PointerAlignElem PointerAlignElem::getInBits(uint32_t AddressSpace,
+ Align ABIAlign, Align PrefAlign,
+ uint32_t TypeBitWidth,
+ uint32_t IndexBitWidth) {
assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!");
PointerAlignElem retval;
retval.AddressSpace = AddressSpace;
retval.ABIAlign = ABIAlign;
retval.PrefAlign = PrefAlign;
- retval.TypeByteWidth = TypeByteWidth;
- retval.IndexWidth = IndexWidth;
+ retval.TypeBitWidth = TypeBitWidth;
+ retval.IndexBitWidth = IndexBitWidth;
return retval;
}
bool
PointerAlignElem::operator==(const PointerAlignElem &rhs) const {
- return (ABIAlign == rhs.ABIAlign
- && AddressSpace == rhs.AddressSpace
- && PrefAlign == rhs.PrefAlign
- && TypeByteWidth == rhs.TypeByteWidth
- && IndexWidth == rhs.IndexWidth);
+ return (ABIAlign == rhs.ABIAlign && AddressSpace == rhs.AddressSpace &&
+ PrefAlign == rhs.PrefAlign && TypeBitWidth == rhs.TypeBitWidth &&
+ IndexBitWidth == rhs.IndexBitWidth);
}
//===----------------------------------------------------------------------===//
@@ -197,7 +196,7 @@ void DataLayout::reset(StringRef Desc) {
E.PrefAlign, E.TypeBitWidth))
return report_fatal_error(std::move(Err));
}
- if (Error Err = setPointerAlignment(0, Align(8), Align(8), 8, 8))
+ if (Error Err = setPointerAlignmentInBits(0, Align(8), Align(8), 64, 64))
return report_fatal_error(std::move(Err));
if (Error Err = parseSpecifier(Desc))
@@ -318,7 +317,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (Error Err = ::split(Rest, ':', Split))
return Err;
unsigned PointerMemSize;
- if (Error Err = getIntInBytes(Tok, PointerMemSize))
+ if (Error Err = getInt(Tok, PointerMemSize))
return Err;
if (!PointerMemSize)
return reportError("Invalid pointer size of 0 bytes");
@@ -354,13 +353,13 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (!Rest.empty()) {
if (Error Err = ::split(Rest, ':', Split))
return Err;
- if (Error Err = getIntInBytes(Tok, IndexSize))
+ if (Error Err = getInt(Tok, IndexSize))
return Err;
if (!IndexSize)
return reportError("Invalid index size of 0 bytes");
}
}
- if (Error Err = setPointerAlignment(
+ if (Error Err = setPointerAlignmentInBits(
AddrSpace, assumeAligned(PointerABIAlign),
assumeAligned(PointerPrefAlign), PointerMemSize, IndexSize))
return Err;
@@ -603,9 +602,10 @@ DataLayout::getPointerAlignElem(uint32_t AddressSpace) const {
return Pointers[0];
}
-Error DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign,
- Align PrefAlign, uint32_t TypeByteWidth,
- uint32_t IndexWidth) {
+Error DataLayout::setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign,
+ Align PrefAlign,
+ uint32_t TypeBitWidth,
+ uint32_t IndexBitWidth) {
if (PrefAlign < ABIAlign)
return reportError(
"Preferred alignment cannot be less than the ABI alignment");
@@ -615,13 +615,14 @@ Error DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign,
return A.AddressSpace < AddressSpace;
});
if (I == Pointers.end() || I->AddressSpace != AddrSpace) {
- Pointers.insert(I, PointerAlignElem::get(AddrSpace, ABIAlign, PrefAlign,
- TypeByteWidth, IndexWidth));
+ Pointers.insert(I,
+ PointerAlignElem::getInBits(AddrSpace, ABIAlign, PrefAlign,
+ TypeBitWidth, IndexBitWidth));
} else {
I->ABIAlign = ABIAlign;
I->PrefAlign = PrefAlign;
- I->TypeByteWidth = TypeByteWidth;
- I->IndexWidth = IndexWidth;
+ I->TypeBitWidth = TypeBitWidth;
+ I->IndexBitWidth = IndexBitWidth;
}
return Error::success();
}
@@ -704,13 +705,14 @@ Align DataLayout::getPointerPrefAlignment(unsigned AS) const {
}
unsigned DataLayout::getPointerSize(unsigned AS) const {
- return getPointerAlignElem(AS).TypeByteWidth;
+ return divideCeil(getPointerAlignElem(AS).TypeBitWidth, 8);
}
unsigned DataLayout::getMaxIndexSize() const {
unsigned MaxIndexSize = 0;
for (auto &P : Pointers)
- MaxIndexSize = std::max(MaxIndexSize, P.IndexWidth);
+ MaxIndexSize =
+ std::max(MaxIndexSize, (unsigned)divideCeil(P.TypeBitWidth, 8));
return MaxIndexSize;
}
@@ -723,7 +725,7 @@ unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
}
unsigned DataLayout::getIndexSize(unsigned AS) const {
- return getPointerAlignElem(AS).IndexWidth;
+ return divideCeil(getPointerAlignElem(AS).IndexBitWidth, 8);
}
unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const {
@@ -901,16 +903,14 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
return Result;
}
-static void addElementIndex(SmallVectorImpl<APInt> &Indices, TypeSize ElemSize,
- APInt &Offset) {
+static APInt getElementIndex(TypeSize ElemSize, APInt &Offset) {
// Skip over scalable or zero size elements. Also skip element sizes larger
// than the positive index space, because the arithmetic below may not be
// correct in that case.
unsigned BitWidth = Offset.getBitWidth();
if (ElemSize.isScalable() || ElemSize == 0 ||
!isUIntN(BitWidth - 1, ElemSize)) {
- Indices.push_back(APInt::getZero(BitWidth));
- return;
+ return APInt::getZero(BitWidth);
}
APInt Index = Offset.sdiv(ElemSize);
@@ -921,47 +921,52 @@ static void addElementIndex(SmallVectorImpl<APInt> &Indices, TypeSize ElemSize,
Offset += ElemSize;
assert(Offset.isNonNegative() && "Remaining offset shouldn't be negative");
}
- Indices.push_back(Index);
+ return Index;
}
-SmallVector<APInt> DataLayout::getGEPIndicesForOffset(Type *&ElemTy,
- APInt &Offset) const {
- assert(ElemTy->isSized() && "Element type must be sized");
- SmallVector<APInt> Indices;
- addElementIndex(Indices, getTypeAllocSize(ElemTy), Offset);
- while (Offset != 0) {
- if (auto *ArrTy = dyn_cast<ArrayType>(ElemTy)) {
- ElemTy = ArrTy->getElementType();
- addElementIndex(Indices, getTypeAllocSize(ElemTy), Offset);
- continue;
- }
+Optional<APInt> DataLayout::getGEPIndexForOffset(Type *&ElemTy,
+ APInt &Offset) const {
+ if (auto *ArrTy = dyn_cast<ArrayType>(ElemTy)) {
+ ElemTy = ArrTy->getElementType();
+ return getElementIndex(getTypeAllocSize(ElemTy), Offset);
+ }
- if (auto *VecTy = dyn_cast<VectorType>(ElemTy)) {
- ElemTy = VecTy->getElementType();
- unsigned ElemSizeInBits = getTypeSizeInBits(ElemTy).getFixedSize();
- // GEPs over non-multiple of 8 size vector elements are invalid.
- if (ElemSizeInBits % 8 != 0)
- break;
+ if (auto *VecTy = dyn_cast<VectorType>(ElemTy)) {
+ ElemTy = VecTy->getElementType();
+ unsigned ElemSizeInBits = getTypeSizeInBits(ElemTy).getFixedSize();
+ // GEPs over non-multiple of 8 size vector elements are invalid.
+ if (ElemSizeInBits % 8 != 0)
+ return None;
- addElementIndex(Indices, TypeSize::Fixed(ElemSizeInBits / 8), Offset);
- continue;
- }
+ return getElementIndex(TypeSize::Fixed(ElemSizeInBits / 8), Offset);
+ }
- if (auto *STy = dyn_cast<StructType>(ElemTy)) {
- const StructLayout *SL = getStructLayout(STy);
- uint64_t IntOffset = Offset.getZExtValue();
- if (IntOffset >= SL->getSizeInBytes())
- break;
+ if (auto *STy = dyn_cast<StructType>(ElemTy)) {
+ const StructLayout *SL = getStructLayout(STy);
+ uint64_t IntOffset = Offset.getZExtValue();
+ if (IntOffset >= SL->getSizeInBytes())
+ return None;
- unsigned Index = SL->getElementContainingOffset(IntOffset);
- Offset -= SL->getElementOffset(Index);
- ElemTy = STy->getElementType(Index);
- Indices.push_back(APInt(32, Index));
- continue;
- }
+ unsigned Index = SL->getElementContainingOffset(IntOffset);
+ Offset -= SL->getElementOffset(Index);
+ ElemTy = STy->getElementType(Index);
+ return APInt(32, Index);
+ }
+
+ // Non-aggregate type.
+ return None;
+}
- // Can't index into non-aggregate type.
- break;
+SmallVector<APInt> DataLayout::getGEPIndicesForOffset(Type *&ElemTy,
+ APInt &Offset) const {
+ assert(ElemTy->isSized() && "Element type must be sized");
+ SmallVector<APInt> Indices;
+ Indices.push_back(getElementIndex(getTypeAllocSize(ElemTy), Offset));
+ while (Offset != 0) {
+ Optional<APInt> Index = getGEPIndexForOffset(ElemTy, Offset);
+ if (!Index)
+ break;
+ Indices.push_back(*Index);
}
return Indices;
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 82b20a8af91b..f1a6402fb11b 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -980,7 +980,10 @@ enum IIT_Info {
IIT_STRUCT9 = 49,
IIT_V256 = 50,
IIT_AMX = 51,
- IIT_PPCF128 = 52
+ IIT_PPCF128 = 52,
+ IIT_V3 = 53,
+ IIT_EXTERNREF = 54,
+ IIT_FUNCREF = 55
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -1056,6 +1059,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
OutputTable.push_back(IITDescriptor::getVector(2, IsScalableVector));
DecodeIITType(NextElt, Infos, Info, OutputTable);
return;
+ case IIT_V3:
+ OutputTable.push_back(IITDescriptor::getVector(3, IsScalableVector));
+ DecodeIITType(NextElt, Infos, Info, OutputTable);
+ return;
case IIT_V4:
OutputTable.push_back(IITDescriptor::getVector(4, IsScalableVector));
DecodeIITType(NextElt, Infos, Info, OutputTable);
@@ -1092,6 +1099,14 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
OutputTable.push_back(IITDescriptor::getVector(1024, IsScalableVector));
DecodeIITType(NextElt, Infos, Info, OutputTable);
return;
+ case IIT_EXTERNREF:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 10));
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
+ return;
+ case IIT_FUNCREF:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 20));
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8));
+ return;
case IIT_PTR:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0));
DecodeIITType(NextElt, Infos, Info, OutputTable);
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 9f38288095e3..b6bd25aa1234 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -126,7 +126,7 @@ void GlobalObject::setAlignment(MaybeAlign Align) {
void GlobalObject::copyAttributesFrom(const GlobalObject *Src) {
GlobalValue::copyAttributesFrom(Src);
- setAlignment(MaybeAlign(Src->getAlignment()));
+ setAlignment(Src->getAlign());
setSection(Src->getSection());
}
@@ -249,7 +249,7 @@ bool GlobalObject::canIncreaseAlignment() const {
// alignment specified. (If it is assigned a section, the global
// could be densely packed with other objects in the section, and
// increasing the alignment could cause padding issues.)
- if (hasSection() && getAlignment() > 0)
+ if (hasSection() && getAlign().hasValue())
return false;
// On ELF platforms, we're further restricted in that we can't
diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp
index 56932b457225..a0c48781ced5 100644
--- a/llvm/lib/IR/InlineAsm.cpp
+++ b/llvm/lib/IR/InlineAsm.cpp
@@ -262,12 +262,12 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
unsigned NumIndirect = 0;
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- switch (Constraints[i].Type) {
+ for (const ConstraintInfo &Constraint : Constraints) {
+ switch (Constraint.Type) {
case InlineAsm::isOutput:
if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0)
return false; // outputs before inputs and clobbers.
- if (!Constraints[i].isIndirect) {
+ if (!Constraint.isIndirect) {
++NumOutputs;
break;
}
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index a4659da7e807..4480ec799c35 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -166,7 +166,10 @@ void Instruction::dropPoisonGeneratingFlags() {
cast<GetElementPtrInst>(this)->setIsInBounds(false);
break;
}
- // TODO: FastMathFlags!
+ if (isa<FPMathOperator>(this)) {
+ setHasNoNaNs(false);
+ setHasNoInfs(false);
+ }
assert(!hasPoisonGeneratingFlags() && "must be kept in sync");
}
@@ -436,17 +439,17 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
if (const AllocaInst *AI = dyn_cast<AllocaInst>(I1))
return AI->getAllocatedType() == cast<AllocaInst>(I2)->getAllocatedType() &&
- (AI->getAlignment() == cast<AllocaInst>(I2)->getAlignment() ||
+ (AI->getAlign() == cast<AllocaInst>(I2)->getAlign() ||
IgnoreAlignment);
if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
- (LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() ||
+ (LI->getAlign() == cast<LoadInst>(I2)->getAlign() ||
IgnoreAlignment) &&
LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() &&
LI->getSyncScopeID() == cast<LoadInst>(I2)->getSyncScopeID();
if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
- (SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() ||
+ (SI->getAlign() == cast<StoreInst>(I2)->getAlign() ||
IgnoreAlignment) &&
SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() &&
SI->getSyncScopeID() == cast<StoreInst>(I2)->getSyncScopeID();
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index ad27a6d8c08e..7798af3b19b9 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1410,8 +1410,6 @@ bool AllocaInst::isStaticAlloca() const {
void LoadInst::AssertOK() {
assert(getOperand(0)->getType()->isPointerTy() &&
"Ptr must have pointer type.");
- assert(!(isAtomic() && getAlignment() == 0) &&
- "Alignment required for atomic load");
}
static Align computeLoadStoreDefaultAlign(Type *Ty, BasicBlock *BB) {
@@ -1490,8 +1488,6 @@ void StoreInst::AssertOK() {
assert(cast<PointerType>(getOperand(1)->getType())
->isOpaqueOrPointeeTypeMatches(getOperand(0)->getType()) &&
"Ptr must be a pointer to Val type!");
- assert(!(isAtomic() && getAlignment() == 0) &&
- "Alignment required for atomic store");
}
StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
@@ -2328,7 +2324,6 @@ bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef<int> Mask,
}
Src1Elts.setBit(i);
Src1Identity &= (M == (i + NumSrcElts));
- continue;
}
assert((Src0Elts | Src1Elts | UndefElts).isAllOnes() &&
"unknown shuffle elements");
@@ -4165,6 +4160,47 @@ bool ICmpInst::compare(const APInt &LHS, const APInt &RHS,
};
}
+bool FCmpInst::compare(const APFloat &LHS, const APFloat &RHS,
+ FCmpInst::Predicate Pred) {
+ APFloat::cmpResult R = LHS.compare(RHS);
+ switch (Pred) {
+ default:
+ llvm_unreachable("Invalid FCmp Predicate");
+ case FCmpInst::FCMP_FALSE:
+ return false;
+ case FCmpInst::FCMP_TRUE:
+ return true;
+ case FCmpInst::FCMP_UNO:
+ return R == APFloat::cmpUnordered;
+ case FCmpInst::FCMP_ORD:
+ return R != APFloat::cmpUnordered;
+ case FCmpInst::FCMP_UEQ:
+ return R == APFloat::cmpUnordered || R == APFloat::cmpEqual;
+ case FCmpInst::FCMP_OEQ:
+ return R == APFloat::cmpEqual;
+ case FCmpInst::FCMP_UNE:
+ return R != APFloat::cmpEqual;
+ case FCmpInst::FCMP_ONE:
+ return R == APFloat::cmpLessThan || R == APFloat::cmpGreaterThan;
+ case FCmpInst::FCMP_ULT:
+ return R == APFloat::cmpUnordered || R == APFloat::cmpLessThan;
+ case FCmpInst::FCMP_OLT:
+ return R == APFloat::cmpLessThan;
+ case FCmpInst::FCMP_UGT:
+ return R == APFloat::cmpUnordered || R == APFloat::cmpGreaterThan;
+ case FCmpInst::FCMP_OGT:
+ return R == APFloat::cmpGreaterThan;
+ case FCmpInst::FCMP_ULE:
+ return R != APFloat::cmpGreaterThan;
+ case FCmpInst::FCMP_OLE:
+ return R == APFloat::cmpLessThan || R == APFloat::cmpEqual;
+ case FCmpInst::FCMP_UGE:
+ return R != APFloat::cmpLessThan;
+ case FCmpInst::FCMP_OGE:
+ return R == APFloat::cmpGreaterThan || R == APFloat::cmpEqual;
+ }
+}
+
CmpInst::Predicate CmpInst::getFlippedSignednessPredicate(Predicate pred) {
assert(CmpInst::isRelational(pred) &&
"Call only with non-equality predicates!");
@@ -4411,7 +4447,7 @@ void SwitchInstProfUpdateWrapper::addCase(
Weights.getValue()[SI.getNumSuccessors() - 1] = *W;
} else if (Weights) {
Changed = true;
- Weights.getValue().push_back(W ? *W : 0);
+ Weights.getValue().push_back(W.getValueOr(0));
}
if (Weights)
assert(SI.getNumSuccessors() == Weights->size() &&
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 9206cd37a6d1..8f7318665cfb 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -468,6 +468,7 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const {
}
Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
+ Type *ReturnType,
ArrayRef<Value *> Params) {
assert(isVPIntrinsic(VPID) && "not a VP intrinsic");
Function *VPFunc;
@@ -486,22 +487,15 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
break;
case Intrinsic::vp_load:
VPFunc = Intrinsic::getDeclaration(
- M, VPID,
- {Params[0]->getType()->getPointerElementType(), Params[0]->getType()});
+ M, VPID, {ReturnType, Params[0]->getType()});
break;
case Intrinsic::vp_gather:
VPFunc = Intrinsic::getDeclaration(
- M, VPID,
- {VectorType::get(cast<VectorType>(Params[0]->getType())
- ->getElementType()
- ->getPointerElementType(),
- cast<VectorType>(Params[0]->getType())),
- Params[0]->getType()});
+ M, VPID, {ReturnType, Params[0]->getType()});
break;
case Intrinsic::vp_store:
VPFunc = Intrinsic::getDeclaration(
- M, VPID,
- {Params[1]->getType()->getPointerElementType(), Params[1]->getType()});
+ M, VPID, {Params[0]->getType(), Params[1]->getType()});
break;
case Intrinsic::vp_scatter:
VPFunc = Intrinsic::getDeclaration(
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index b2909c425846..24c4a348f4da 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -386,8 +386,9 @@ template <> struct MDNodeKeyImpl<DIEnumerator> {
IsUnsigned(N->isUnsigned()) {}
bool isKeyOf(const DIEnumerator *RHS) const {
- return APInt::isSameValue(Value, RHS->getValue()) &&
- IsUnsigned == RHS->isUnsigned() && Name == RHS->getRawName();
+ return Value.getBitWidth() == RHS->getValue().getBitWidth() &&
+ Value == RHS->getValue() && IsUnsigned == RHS->isUnsigned() &&
+ Name == RHS->getRawName();
}
unsigned getHashValue() const { return hash_combine(Value, Name); }
@@ -1424,6 +1425,8 @@ public:
DenseMap<const GlobalValue *, DSOLocalEquivalent *> DSOLocalEquivalents;
+ DenseMap<const GlobalValue *, NoCFIValue *> NoCFIValues;
+
ConstantUniqueMap<ConstantExpr> ExprConstants;
ConstantUniqueMap<InlineAsm> InlineAsms;
diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index 7bccf09012ca..bb72bec93066 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -886,9 +886,8 @@ void PMDataManager::recordAvailableAnalysis(Pass *P) {
// implements as well.
const PassInfo *PInf = TPM->findAnalysisPassInfo(PI);
if (!PInf) return;
- const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
- for (unsigned i = 0, e = II.size(); i != e; ++i)
- AvailableAnalysis[II[i]->getTypeInfo()] = P;
+ for (const PassInfo *PI : PInf->getInterfacesImplemented())
+ AvailableAnalysis[PI->getTypeInfo()] = P;
}
// Return true if P preserves high level analysis used by other
@@ -1013,10 +1012,9 @@ void PMDataManager::freePass(Pass *P, StringRef Msg,
// Remove all interfaces this pass implements, for which it is also
// listed as the available implementation.
- const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
- for (unsigned i = 0, e = II.size(); i != e; ++i) {
- DenseMap<AnalysisID, Pass*>::iterator Pos =
- AvailableAnalysis.find(II[i]->getTypeInfo());
+ for (const PassInfo *PI : PInf->getInterfacesImplemented()) {
+ DenseMap<AnalysisID, Pass *>::iterator Pos =
+ AvailableAnalysis.find(PI->getTypeInfo());
if (Pos != AvailableAnalysis.end() && Pos->second == P)
AvailableAnalysis.erase(Pos);
}
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index 63ea41fba89a..a0485a59d0e0 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -750,8 +750,8 @@ void Module::setSDKVersion(const VersionTuple &V) {
ConstantDataArray::get(Context, Entries));
}
-VersionTuple Module::getSDKVersion() const {
- auto *CM = dyn_cast_or_null<ConstantAsMetadata>(getModuleFlag("SDK Version"));
+static VersionTuple getSDKVersionMD(Metadata *MD) {
+ auto *CM = dyn_cast_or_null<ConstantAsMetadata>(MD);
if (!CM)
return {};
auto *Arr = dyn_cast_or_null<ConstantDataArray>(CM->getValue());
@@ -775,6 +775,10 @@ VersionTuple Module::getSDKVersion() const {
return Result;
}
+VersionTuple Module::getSDKVersion() const {
+ return getSDKVersionMD(getModuleFlag("SDK Version"));
+}
+
GlobalVariable *llvm::collectUsedGlobalVariables(
const Module &M, SmallVectorImpl<GlobalValue *> &Vec, bool CompilerUsed) {
const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used";
@@ -809,3 +813,13 @@ void Module::setPartialSampleProfileRatio(const ModuleSummaryIndex &Index) {
}
}
}
+
+StringRef Module::getDarwinTargetVariantTriple() const {
+ if (const auto *MD = getModuleFlag("darwin.target_variant.triple"))
+ return cast<MDString>(MD)->getString();
+ return "";
+}
+
+VersionTuple Module::getDarwinTargetVariantSDKVersion() const {
+ return getSDKVersionMD(getModuleFlag("darwin.target_variant.SDK Version"));
+}
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index 31c5cd938d03..a0ac7d3ad7d3 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -447,11 +447,17 @@ static std::string linkageToString(GlobalValue::LinkageTypes LT) {
static std::string fflagsToString(FunctionSummary::FFlags F) {
auto FlagValue = [](unsigned V) { return V ? '1' : '0'; };
- char FlagRep[] = {FlagValue(F.ReadNone), FlagValue(F.ReadOnly),
- FlagValue(F.NoRecurse), FlagValue(F.ReturnDoesNotAlias),
- FlagValue(F.NoInline), FlagValue(F.AlwaysInline),
- FlagValue(F.NoUnwind), FlagValue(F.MayThrow),
- FlagValue(F.HasUnknownCall), 0};
+ char FlagRep[] = {FlagValue(F.ReadNone),
+ FlagValue(F.ReadOnly),
+ FlagValue(F.NoRecurse),
+ FlagValue(F.ReturnDoesNotAlias),
+ FlagValue(F.NoInline),
+ FlagValue(F.AlwaysInline),
+ FlagValue(F.NoUnwind),
+ FlagValue(F.MayThrow),
+ FlagValue(F.HasUnknownCall),
+ FlagValue(F.MustBeUnreachable),
+ 0};
return FlagRep;
}
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index d15fcfbc5b9f..08c1fc931e2e 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -39,9 +39,10 @@ bool Operator::hasPoisonGeneratingFlags() const {
return GEP->isInBounds() || GEP->getInRangeIndex() != None;
}
default:
+ if (const auto *FP = dyn_cast<FPMathOperator>(this))
+ return FP->hasNoNaNs() || FP->hasNoInfs();
return false;
}
- // TODO: FastMathFlags! (On instructions, but not constexpr)
}
Type *GEPOperator::getSourceElementType() const {
@@ -89,7 +90,7 @@ bool GEPOperator::accumulateConstantOffset(
assert(Offset.getBitWidth() ==
DL.getIndexSizeInBits(getPointerAddressSpace()) &&
"The offset bit width does not match DL specification.");
- SmallVector<const Value *> Index(value_op_begin() + 1, value_op_end());
+ SmallVector<const Value *> Index(llvm::drop_begin(operand_values()));
return GEPOperator::accumulateConstantOffset(getSourceElementType(), Index,
DL, Offset, ExternalAnalysis);
}
diff --git a/llvm/lib/IR/SSAContext.cpp b/llvm/lib/IR/SSAContext.cpp
new file mode 100644
index 000000000000..a96e39f32882
--- /dev/null
+++ b/llvm/lib/IR/SSAContext.cpp
@@ -0,0 +1,47 @@
+//===- SSAContext.cpp -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a specialization of the GenericSSAContext<X>
+/// template class for LLVM IR.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/SSAContext.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+BasicBlock *SSAContext::getEntryBlock(Function &F) {
+ return &F.getEntryBlock();
+}
+
+void SSAContext::setFunction(Function &Fn) { F = &Fn; }
+
+Printable SSAContext::print(Value *V) const {
+ return Printable([V](raw_ostream &Out) { V->print(Out); });
+}
+
+Printable SSAContext::print(Instruction *Inst) const {
+ return print(cast<Value>(Inst));
+}
+
+Printable SSAContext::print(BasicBlock *BB) const {
+ if (BB->hasName())
+ return Printable([BB](raw_ostream &Out) { Out << BB->getName(); });
+
+ return Printable([BB](raw_ostream &Out) {
+ ModuleSlotTracker MST{BB->getParent()->getParent(), false};
+ MST.incorporateFunction(*BB->getParent());
+ Out << MST.getLocalSlot(BB);
+ });
+}
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index b475c8327874..8741ed917f9f 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -928,7 +928,7 @@ Align Value::getPointerAlignment(const DataLayout &DL) const {
}
llvm_unreachable("Unhandled FunctionPtrAlignType");
}
- const MaybeAlign Alignment(GO->getAlignment());
+ const MaybeAlign Alignment(GO->getAlign());
if (!Alignment) {
if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
Type *ObjectType = GVar->getValueType();
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 154b59835b01..fb7c423e54e2 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -543,7 +543,7 @@ private:
void verifySwiftErrorCall(CallBase &Call, const Value *SwiftErrorVal);
void verifySwiftErrorValue(const Value *SwiftErrorVal);
- void verifyTailCCMustTailAttrs(AttrBuilder Attrs, StringRef Context);
+ void verifyTailCCMustTailAttrs(const AttrBuilder &Attrs, StringRef Context);
void verifyMustTailCall(CallInst &CI);
bool verifyAttributeCount(AttributeList Attrs, unsigned Params);
void verifyAttributeTypes(AttributeSet Attrs, const Value *V);
@@ -553,8 +553,6 @@ private:
void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
const Value *V, bool IsIntrinsic);
void verifyFunctionMetadata(ArrayRef<std::pair<unsigned, MDNode *>> MDs);
- template <typename T>
- void verifyODRTypeAsScopeOperand(const MDNode &MD, T * = nullptr);
void visitConstantExprsRecursively(const Constant *EntryC);
void visitConstantExpr(const ConstantExpr *CE);
@@ -604,26 +602,35 @@ void Verifier::visit(Instruction &I) {
InstVisitor<Verifier>::visit(I);
}
-// Helper to recursively iterate over indirect users. By
-// returning false, the callback can ask to stop recursing
-// further.
+// Helper to iterate over indirect users. By returning false, the callback can ask to stop traversing further.
static void forEachUser(const Value *User,
SmallPtrSet<const Value *, 32> &Visited,
llvm::function_ref<bool(const Value *)> Callback) {
if (!Visited.insert(User).second)
return;
- for (const Value *TheNextUser : User->materialized_users())
- if (Callback(TheNextUser))
- forEachUser(TheNextUser, Visited, Callback);
+
+ SmallVector<const Value *> WorkList;
+ append_range(WorkList, User->materialized_users());
+ while (!WorkList.empty()) {
+ const Value *Cur = WorkList.pop_back_val();
+ if (!Visited.insert(Cur).second)
+ continue;
+ if (Callback(Cur))
+ append_range(WorkList, Cur->materialized_users());
+ }
}
void Verifier::visitGlobalValue(const GlobalValue &GV) {
Assert(!GV.isDeclaration() || GV.hasValidDeclarationLinkage(),
"Global is external, but doesn't have external or weak linkage!", &GV);
- if (const GlobalObject *GO = dyn_cast<GlobalObject>(&GV))
- Assert(GO->getAlignment() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", GO);
+ if (const GlobalObject *GO = dyn_cast<GlobalObject>(&GV)) {
+
+ if (MaybeAlign A = GO->getAlign()) {
+ Assert(A->value() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", GO);
+ }
+ }
Assert(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
"Only global variables can have appending linkage!", &GV);
@@ -733,8 +740,9 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
Value *V = Op->stripPointerCasts();
Assert(isa<GlobalVariable>(V) || isa<Function>(V) ||
isa<GlobalAlias>(V),
- "invalid llvm.used member", V);
- Assert(V->hasName(), "members of llvm.used must be named", V);
+ Twine("invalid ") + GV.getName() + " member", V);
+ Assert(V->hasName(),
+ Twine("members of ") + GV.getName() + " must be named", V);
}
}
}
@@ -860,19 +868,6 @@ void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
}
}
-template <typename T>
-void Verifier::verifyODRTypeAsScopeOperand(const MDNode &MD, T *) {
- if (isa<T>(MD)) {
- if (auto *N = dyn_cast_or_null<DICompositeType>(cast<T>(MD).getScope()))
- // Of all the supported tags for DICompositeType(see visitDICompositeType)
- // we know that enum type cannot be a scope.
- AssertDI(N->getTag() != dwarf::DW_TAG_enumeration_type,
- "enum type is not a scope; check enum type ODR "
- "violation",
- N, &MD);
- }
-}
-
void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) {
// Only visit each node once. Metadata can be mutually recursive, so this
// avoids infinite recursion here, as well as being an optimization.
@@ -882,12 +877,6 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) {
Assert(&MD.getContext() == &Context,
"MDNode context does not match Module context!", &MD);
- // Makes sure when a scope operand is a ODR type, the ODR type uniquing does
- // not create invalid debug metadata.
- // TODO: check that the non-ODR-type scope operand is valid.
- verifyODRTypeAsScopeOperand<DIType>(MD);
- verifyODRTypeAsScopeOperand<DILocalScope>(MD);
-
switch (MD.getMetadataID()) {
default:
llvm_unreachable("Invalid MDNode subclass");
@@ -2055,10 +2044,12 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
}
if (Attrs.hasFnAttr(Attribute::VScaleRange)) {
- std::pair<unsigned, unsigned> Args =
- Attrs.getFnAttrs().getVScaleRangeArgs();
+ unsigned VScaleMin = Attrs.getFnAttrs().getVScaleRangeMin();
+ if (VScaleMin == 0)
+ CheckFailed("'vscale_range' minimum must be greater than 0", V);
- if (Args.first > Args.second && Args.second != 0)
+ Optional<unsigned> VScaleMax = Attrs.getFnAttrs().getVScaleRangeMax();
+ if (VScaleMax && VScaleMin > VScaleMax)
CheckFailed("'vscale_range' minimum cannot be greater than maximum", V);
}
@@ -3328,7 +3319,7 @@ void Verifier::visitCallBase(CallBase &Call) {
visitInstruction(Call);
}
-void Verifier::verifyTailCCMustTailAttrs(AttrBuilder Attrs,
+void Verifier::verifyTailCCMustTailAttrs(const AttrBuilder &Attrs,
StringRef Context) {
Assert(!Attrs.contains(Attribute::InAlloca),
Twine("inalloca attribute not allowed in ") + Context);
@@ -3733,15 +3724,15 @@ void Verifier::visitLoadInst(LoadInst &LI) {
PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
Assert(PTy, "Load operand must be a pointer.", &LI);
Type *ElTy = LI.getType();
- Assert(LI.getAlignment() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", &LI);
+ if (MaybeAlign A = LI.getAlign()) {
+ Assert(A->value() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &LI);
+ }
Assert(ElTy->isSized(), "loading unsized types is not allowed", &LI);
if (LI.isAtomic()) {
Assert(LI.getOrdering() != AtomicOrdering::Release &&
LI.getOrdering() != AtomicOrdering::AcquireRelease,
"Load cannot have Release ordering", &LI);
- Assert(LI.getAlignment() != 0,
- "Atomic load must specify explicit alignment", &LI);
Assert(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
"atomic load operand must have integer, pointer, or floating point "
"type!",
@@ -3761,15 +3752,15 @@ void Verifier::visitStoreInst(StoreInst &SI) {
Type *ElTy = SI.getOperand(0)->getType();
Assert(PTy->isOpaqueOrPointeeTypeMatches(ElTy),
"Stored value type does not match pointer operand type!", &SI, ElTy);
- Assert(SI.getAlignment() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", &SI);
+ if (MaybeAlign A = SI.getAlign()) {
+ Assert(A->value() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &SI);
+ }
Assert(ElTy->isSized(), "storing unsized types is not allowed", &SI);
if (SI.isAtomic()) {
Assert(SI.getOrdering() != AtomicOrdering::Acquire &&
SI.getOrdering() != AtomicOrdering::AcquireRelease,
"Store cannot have Acquire ordering", &SI);
- Assert(SI.getAlignment() != 0,
- "Atomic store must specify explicit alignment", &SI);
Assert(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
"atomic store operand must have integer, pointer, or floating point "
"type!",
@@ -3820,8 +3811,10 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
"Cannot allocate unsized type", &AI);
Assert(AI.getArraySize()->getType()->isIntegerTy(),
"Alloca array size must have integer type", &AI);
- Assert(AI.getAlignment() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", &AI);
+ if (MaybeAlign A = AI.getAlign()) {
+ Assert(A->value() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &AI);
+ }
if (AI.isSwiftError()) {
verifySwiftErrorValue(&AI);
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 6ce2ed265739..f26ef4b21996 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1106,7 +1106,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
if (Conf.PreOptModuleHook &&
!Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
- return Error::success();
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
if (!Conf.CodeGenOnly) {
for (const auto &R : GlobalResolutions) {
@@ -1132,7 +1132,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
if (Conf.PostInternalizeModuleHook &&
!Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
- return Error::success();
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
}
if (!RegularLTO.EmptyCombinedModule || Conf.AlwaysEmitRegularLTOObj) {
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index be06556b0c3b..855d0fc8a8be 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
-#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -413,6 +412,8 @@ static void codegen(const Config &Conf, TargetMachine *TM,
if (Error Err = StreamOrErr.takeError())
report_fatal_error(std::move(Err));
std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
+ TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName;
+
legacy::PassManager CodeGenPasses;
CodeGenPasses.add(
createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex));
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index 088e45c9e8dc..fdc9896aca78 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -135,9 +135,8 @@ LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context)
LTOCodeGenerator::~LTOCodeGenerator() {}
void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) {
- const std::vector<StringRef> &undefs = Mod->getAsmUndefinedRefs();
- for (int i = 0, e = undefs.size(); i != e; ++i)
- AsmUndefinedRefs.insert(undefs[i]);
+ for (const StringRef &Undef : Mod->getAsmUndefinedRefs())
+ AsmUndefinedRefs.insert(Undef);
}
bool LTOCodeGenerator::addModule(LTOModule *Mod) {
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 9474d8c9dafb..9aea27f0fdba 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -378,7 +378,8 @@ std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
// Run codegen now. resulting binary is in OutputBuffer.
PM.run(TheModule);
}
- return std::make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer));
+ return std::make_unique<SmallVectorMemoryBuffer>(
+ std::move(OutputBuffer), /*RequiresNullTerminator=*/false);
}
/// Manage caching for a single Module.
@@ -541,7 +542,8 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
auto Index = buildModuleSummaryIndex(TheModule, nullptr, &PSI);
WriteBitcodeToFile(TheModule, OS, true, &Index);
}
- return std::make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer));
+ return std::make_unique<SmallVectorMemoryBuffer>(
+ std::move(OutputBuffer), /*RequiresNullTerminator=*/false);
}
return codegenModule(TheModule, TM);
diff --git a/llvm/lib/LineEditor/LineEditor.cpp b/llvm/lib/LineEditor/LineEditor.cpp
index 1aa3476eb357..37c4b79f8e29 100644
--- a/llvm/lib/LineEditor/LineEditor.cpp
+++ b/llvm/lib/LineEditor/LineEditor.cpp
@@ -69,9 +69,8 @@ LineEditor::ListCompleterConcept::complete(StringRef Buffer, size_t Pos) const {
// common prefix will then be empty.
if (CommonPrefix.empty()) {
Action.Kind = CompletionAction::AK_ShowCompletions;
- for (std::vector<Completion>::iterator I = Comps.begin(), E = Comps.end();
- I != E; ++I)
- Action.Completions.push_back(I->DisplayText);
+ for (const Completion &Comp : Comps)
+ Action.Completions.push_back(Comp.DisplayText);
} else {
Action.Kind = CompletionAction::AK_Insert;
Action.Text = CommonPrefix;
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index bad483be197d..b475ea81d107 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -646,7 +646,7 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
/*init*/ nullptr, SGVar->getName(),
/*insertbefore*/ nullptr, SGVar->getThreadLocalMode(),
SGVar->getAddressSpace());
- NewDGV->setAlignment(MaybeAlign(SGVar->getAlignment()));
+ NewDGV->setAlignment(SGVar->getAlign());
NewDGV->copyAttributesFrom(SGVar);
return NewDGV;
}
@@ -877,7 +877,7 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
if (DstGV->isConstant() != SrcGV->isConstant())
return stringErr("Appending variables linked with different const'ness!");
- if (DstGV->getAlignment() != SrcGV->getAlignment())
+ if (DstGV->getAlign() != SrcGV->getAlign())
return stringErr(
"Appending variables with different alignment need to be linked!");
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 2ca921017171..5c2aaddff4d1 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -168,9 +168,14 @@ public:
unsigned Update, VersionTuple SDKVersion) override;
void emitBuildVersion(unsigned Platform, unsigned Major, unsigned Minor,
unsigned Update, VersionTuple SDKVersion) override;
+ void emitDarwinTargetVariantBuildVersion(unsigned Platform, unsigned Major,
+ unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion) override;
void emitThumbFunc(MCSymbol *Func) override;
void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
+ void emitConditionalAssignment(MCSymbol *Symbol,
+ const MCExpr *Value) override;
void emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
@@ -640,6 +645,12 @@ void MCAsmStreamer::emitBuildVersion(unsigned Platform, unsigned Major,
EmitEOL();
}
+void MCAsmStreamer::emitDarwinTargetVariantBuildVersion(
+ unsigned Platform, unsigned Major, unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion) {
+ emitBuildVersion(Platform, Major, Minor, Update, SDKVersion);
+}
+
void MCAsmStreamer::emitThumbFunc(MCSymbol *Func) {
// This needs to emit to a temporary string to get properly quoted
// MCSymbols when they have spaces in them.
@@ -670,6 +681,15 @@ void MCAsmStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
MCStreamer::emitAssignment(Symbol, Value);
}
+void MCAsmStreamer::emitConditionalAssignment(MCSymbol *Symbol,
+ const MCExpr *Value) {
+ OS << ".lto_set_conditional ";
+ Symbol->print(OS, MAI);
+ OS << ", ";
+ Value->print(OS, MAI);
+ EmitEOL();
+}
+
void MCAsmStreamer::emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
OS << ".weakref ";
Alias->print(OS, MAI);
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index d5e9f4fc66bc..a8837bbf57c7 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -89,6 +89,7 @@ MCAssembler::MCAssembler(MCContext &Context,
BundleAlignSize(0), RelaxAll(false), SubsectionsViaSymbols(false),
IncrementalLinkerCompatible(false), ELFHeaderEFlags(0) {
VersionInfo.Major = 0; // Major version == 0 for "none specified"
+ DarwinTargetVariantVersionInfo.Major = 0;
}
MCAssembler::~MCAssembler() = default;
@@ -109,6 +110,8 @@ void MCAssembler::reset() {
LOHContainer.reset();
VersionInfo.Major = 0;
VersionInfo.SDKVersion = VersionTuple();
+ DarwinTargetVariantVersionInfo.Major = 0;
+ DarwinTargetVariantVersionInfo.SDKVersion = VersionTuple();
// reset objects owned by us
if (getBackendPtr())
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index 52b59185c6fc..4ed1c6286a72 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -39,4 +39,4 @@ Optional<uint64_t>
MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst,
uint64_t Size) const {
return None;
-} \ No newline at end of file
+}
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index aa94b141d8be..3edf7a3f49e6 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -92,6 +92,9 @@ public:
unsigned Update, VersionTuple SDKVersion) override;
void emitBuildVersion(unsigned Platform, unsigned Major, unsigned Minor,
unsigned Update, VersionTuple SDKVersion) override;
+ void emitDarwinTargetVariantBuildVersion(unsigned Platform, unsigned Major,
+ unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion) override;
void emitThumbFunc(MCSymbol *Func) override;
bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
void emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override;
@@ -283,6 +286,13 @@ void MCMachOStreamer::emitBuildVersion(unsigned Platform, unsigned Major,
Update, SDKVersion);
}
+void MCMachOStreamer::emitDarwinTargetVariantBuildVersion(
+ unsigned Platform, unsigned Major, unsigned Minor, unsigned Update,
+ VersionTuple SDKVersion) {
+ getAssembler().setDarwinTargetVariantBuildVersion(
+ (MachO::PlatformType)Platform, Major, Minor, Update, SDKVersion);
+}
+
void MCMachOStreamer::emitThumbFunc(MCSymbol *Symbol) {
// Remember that the function is a thumb function. Fixup and relocation
// values will need adjusted.
@@ -516,7 +526,10 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context,
new MCMachOStreamer(Context, std::move(MAB), std::move(OW), std::move(CE),
DWARFMustBeAtTheEnd, LabelSections);
const Triple &Target = Context.getTargetTriple();
- S->emitVersionForTarget(Target, Context.getObjectFileInfo()->getSDKVersion());
+ S->emitVersionForTarget(
+ Target, Context.getObjectFileInfo()->getSDKVersion(),
+ Context.getObjectFileInfo()->getDarwinTargetVariantTriple(),
+ Context.getObjectFileInfo()->getDarwinTargetVariantSDKVersion());
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
diff --git a/llvm/lib/MC/MCNullStreamer.cpp b/llvm/lib/MC/MCNullStreamer.cpp
index 291d840b4f4b..40b7eba58b03 100644
--- a/llvm/lib/MC/MCNullStreamer.cpp
+++ b/llvm/lib/MC/MCNullStreamer.cpp
@@ -40,6 +40,9 @@ namespace {
void EmitCOFFSymbolStorageClass(int StorageClass) override {}
void EmitCOFFSymbolType(int Type) override {}
void EndCOFFSymbolDef() override {}
+ void
+ emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol, MCSymbolAttr Linkage,
+ MCSymbolAttr Visibility) override {}
};
}
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 9c86fcc86bcb..6604d7988c4c 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -281,6 +281,18 @@ void MCObjectStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
Symbol->setOffset(0);
addPendingLabel(Symbol);
}
+
+ emitPendingAssignments(Symbol);
+}
+
+void MCObjectStreamer::emitPendingAssignments(MCSymbol *Symbol) {
+ auto Assignments = pendingAssignments.find(Symbol);
+ if (Assignments != pendingAssignments.end()) {
+ for (const PendingAssignment &A : Assignments->second)
+ emitAssignment(A.Symbol, A.Value);
+
+ pendingAssignments.erase(Assignments);
+ }
}
// Emit a label at a previously emitted fragment/offset position. This must be
@@ -353,6 +365,19 @@ bool MCObjectStreamer::changeSectionImpl(MCSection *Section,
void MCObjectStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
getAssembler().registerSymbol(*Symbol);
MCStreamer::emitAssignment(Symbol, Value);
+ emitPendingAssignments(Symbol);
+}
+
+void MCObjectStreamer::emitConditionalAssignment(MCSymbol *Symbol,
+ const MCExpr *Value) {
+ const MCSymbol *Target = &cast<MCSymbolRefExpr>(*Value).getSymbol();
+
+ // If the symbol already exists, emit the assignment. Otherwise, emit it
+ // later only if the symbol is also emitted.
+ if (Target->isRegistered())
+ emitAssignment(Symbol, Value);
+ else
+ pendingAssignments[Target].push_back({Symbol, Value});
}
bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const {
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index ed9f2066dc20..705f7159d55b 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -356,8 +356,14 @@ private:
/// return the contents from the current token up to the end or comma.
StringRef parseStringToComma();
- bool parseAssignment(StringRef Name, bool allow_redef,
- bool NoDeadStrip = false);
+ enum class AssignmentKind {
+ Set,
+ Equiv,
+ Equal,
+ LTOSetConditional,
+ };
+
+ bool parseAssignment(StringRef Name, AssignmentKind Kind);
unsigned getBinOpPrecedence(AsmToken::TokenKind K,
MCBinaryExpr::Opcode &Kind);
@@ -534,6 +540,7 @@ private:
DK_ADDRSIG_SYM,
DK_PSEUDO_PROBE,
DK_LTO_DISCARD,
+ DK_LTO_SET_CONDITIONAL,
DK_END
};
@@ -564,8 +571,8 @@ private:
const fltSemantics &); // ".single", ...
bool parseDirectiveFill(); // ".fill"
bool parseDirectiveZero(); // ".zero"
- // ".set", ".equ", ".equiv"
- bool parseDirectiveSet(StringRef IDVal, bool allow_redef);
+ // ".set", ".equ", ".equiv", ".lto_set_conditional"
+ bool parseDirectiveSet(StringRef IDVal, AssignmentKind Kind);
bool parseDirectiveOrg(); // ".org"
// ".align{,32}", ".p2align{,w,l}"
bool parseDirectiveAlign(bool IsPow2, unsigned ValueSize);
@@ -1968,7 +1975,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
// identifier '=' ... -> assignment statement
Lex();
- return parseAssignment(IDVal, true);
+ return parseAssignment(IDVal, AssignmentKind::Equal);
default: // Normal instruction or directive.
break;
@@ -2027,9 +2034,11 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
break;
case DK_SET:
case DK_EQU:
- return parseDirectiveSet(IDVal, true);
+ return parseDirectiveSet(IDVal, AssignmentKind::Set);
case DK_EQUIV:
- return parseDirectiveSet(IDVal, false);
+ return parseDirectiveSet(IDVal, AssignmentKind::Equiv);
+ case DK_LTO_SET_CONDITIONAL:
+ return parseDirectiveSet(IDVal, AssignmentKind::LTOSetConditional);
case DK_ASCII:
return parseDirectiveAscii(IDVal, false);
case DK_ASCIZ:
@@ -2925,11 +2934,13 @@ void AsmParser::handleMacroExit() {
ActiveMacros.pop_back();
}
-bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
- bool NoDeadStrip) {
+bool AsmParser::parseAssignment(StringRef Name, AssignmentKind Kind) {
MCSymbol *Sym;
const MCExpr *Value;
- if (MCParserUtils::parseAssignmentExpression(Name, allow_redef, *this, Sym,
+ SMLoc ExprLoc = getTok().getLoc();
+ bool AllowRedef =
+ Kind == AssignmentKind::Set || Kind == AssignmentKind::Equal;
+ if (MCParserUtils::parseAssignmentExpression(Name, AllowRedef, *this, Sym,
Value))
return true;
@@ -2944,9 +2955,22 @@ bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
return false;
// Do the assignment.
- Out.emitAssignment(Sym, Value);
- if (NoDeadStrip)
+ switch (Kind) {
+ case AssignmentKind::Equal:
+ Out.emitAssignment(Sym, Value);
+ break;
+ case AssignmentKind::Set:
+ case AssignmentKind::Equiv:
+ Out.emitAssignment(Sym, Value);
Out.emitSymbolAttribute(Sym, MCSA_NoDeadStrip);
+ break;
+ case AssignmentKind::LTOSetConditional:
+ if (Value->getKind() != MCExpr::SymbolRef)
+ return Error(ExprLoc, "expected identifier");
+
+ Out.emitConditionalAssignment(Sym, Value);
+ break;
+ }
return false;
}
@@ -2998,10 +3022,11 @@ bool AsmParser::parseIdentifier(StringRef &Res) {
/// ::= .equ identifier ',' expression
/// ::= .equiv identifier ',' expression
/// ::= .set identifier ',' expression
-bool AsmParser::parseDirectiveSet(StringRef IDVal, bool allow_redef) {
+/// ::= .lto_set_conditional identifier ',' expression
+bool AsmParser::parseDirectiveSet(StringRef IDVal, AssignmentKind Kind) {
StringRef Name;
if (check(parseIdentifier(Name), "expected identifier") || parseComma() ||
- parseAssignment(Name, allow_redef, true))
+ parseAssignment(Name, Kind))
return true;
return false;
}
@@ -5581,6 +5606,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".addrsig_sym"] = DK_ADDRSIG_SYM;
DirectiveKindMap[".pseudoprobe"] = DK_PSEUDO_PROBE;
DirectiveKindMap[".lto_discard"] = DK_LTO_DISCARD;
+ DirectiveKindMap[".lto_set_conditional"] = DK_LTO_SET_CONDITIONAL;
}
MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
@@ -6012,12 +6038,13 @@ bool AsmParser::parseMSInlineAsm(
bool isOutput = (i == 1) && Desc.mayStore();
SMLoc Start = SMLoc::getFromPointer(SymName.data());
+ int64_t Size = Operand.isMemPlaceholder(Desc) ? 0 : SymName.size();
if (isOutput) {
++InputIdx;
OutputDecls.push_back(OpDecl);
OutputDeclsAddressOf.push_back(Operand.needAddressOf());
OutputConstraints.push_back(("=" + Constraint).str());
- AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
+ AsmStrRewrites.emplace_back(AOK_Output, Start, Size);
} else {
InputDecls.push_back(OpDecl);
InputDeclsAddressOf.push_back(Operand.needAddressOf());
@@ -6025,7 +6052,7 @@ bool AsmParser::parseMSInlineAsm(
if (Desc.OpInfo[i - 1].isBranchTarget())
AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
else
- AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
+ AsmStrRewrites.emplace_back(AOK_Input, Start, Size);
}
}
@@ -6140,13 +6167,17 @@ bool AsmParser::parseMSInlineAsm(
OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
break;
case AOK_Input:
- OS << '$' << InputIdx++;
+ if (AR.Len)
+ OS << '$' << InputIdx;
+ ++InputIdx;
break;
case AOK_CallInput:
OS << "${" << InputIdx++ << ":P}";
break;
case AOK_Output:
- OS << '$' << OutputIdx++;
+ if (AR.Len)
+ OS << '$' << OutputIdx;
+ ++OutputIdx;
break;
case AOK_SizeDirective:
switch (AR.Val) {
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index e35bcec8fe75..ebf38327f4dc 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -151,8 +151,8 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS,
// InlineSite is unique for each pair,
// so there will be no ordering of Inlinee based on MCPseudoProbeInlineTree*
std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees;
- for (auto Child = Children.begin(); Child != Children.end(); ++Child)
- Inlinees[Child->first] = Child->second.get();
+ for (auto &Child : Children)
+ Inlinees[Child.first] = Child.second.get();
for (const auto &Inlinee : Inlinees) {
if (Guid) {
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index f4e64b42c817..9c37a7bebe2a 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -431,6 +431,9 @@ void MCStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
TS->emitLabel(Symbol);
}
+void MCStreamer::emitConditionalAssignment(MCSymbol *Symbol,
+ const MCExpr *Value) {}
+
void MCStreamer::emitCFISections(bool EH, bool Debug) {}
void MCStreamer::emitCFIStartProc(bool IsSimple, SMLoc Loc) {
@@ -1308,45 +1311,78 @@ getMachoBuildVersionPlatformType(const Triple &Target) {
llvm_unreachable("unexpected OS type");
}
-void MCStreamer::emitVersionForTarget(const Triple &Target,
- const VersionTuple &SDKVersion) {
+void MCStreamer::emitVersionForTarget(
+ const Triple &Target, const VersionTuple &SDKVersion,
+ const Triple *DarwinTargetVariantTriple,
+ const VersionTuple &DarwinTargetVariantSDKVersion) {
if (!Target.isOSBinFormatMachO() || !Target.isOSDarwin())
return;
// Do we even know the version?
if (Target.getOSMajorVersion() == 0)
return;
- unsigned Major = 0;
- unsigned Minor = 0;
- unsigned Update = 0;
+ VersionTuple Version;
switch (Target.getOS()) {
case Triple::MacOSX:
case Triple::Darwin:
- Target.getMacOSXVersion(Major, Minor, Update);
+ Target.getMacOSXVersion(Version);
break;
case Triple::IOS:
case Triple::TvOS:
- Target.getiOSVersion(Major, Minor, Update);
+ Version = Target.getiOSVersion();
break;
case Triple::WatchOS:
- Target.getWatchOSVersion(Major, Minor, Update);
+ Version = Target.getWatchOSVersion();
break;
default:
llvm_unreachable("unexpected OS type");
}
- assert(Major != 0 && "A non-zero major version is expected");
- auto LinkedTargetVersion = targetVersionOrMinimumSupportedOSVersion(
- Target, VersionTuple(Major, Minor, Update));
+ assert(Version.getMajor() != 0 && "A non-zero major version is expected");
+ auto LinkedTargetVersion =
+ targetVersionOrMinimumSupportedOSVersion(Target, Version);
auto BuildVersionOSVersion = getMachoBuildVersionSupportedOS(Target);
+ bool ShouldEmitBuildVersion = false;
if (BuildVersionOSVersion.empty() ||
- LinkedTargetVersion >= BuildVersionOSVersion)
- return emitBuildVersion(getMachoBuildVersionPlatformType(Target),
- LinkedTargetVersion.getMajor(),
- *LinkedTargetVersion.getMinor(),
- *LinkedTargetVersion.getSubminor(), SDKVersion);
+ LinkedTargetVersion >= BuildVersionOSVersion) {
+ if (Target.isMacCatalystEnvironment() && DarwinTargetVariantTriple &&
+ DarwinTargetVariantTriple->isMacOSX()) {
+ emitVersionForTarget(*DarwinTargetVariantTriple,
+ DarwinTargetVariantSDKVersion,
+ /*TargetVariantTriple=*/nullptr,
+ /*TargetVariantSDKVersion=*/VersionTuple());
+ emitDarwinTargetVariantBuildVersion(
+ getMachoBuildVersionPlatformType(Target),
+ LinkedTargetVersion.getMajor(),
+ LinkedTargetVersion.getMinor().getValueOr(0),
+ LinkedTargetVersion.getSubminor().getValueOr(0), SDKVersion);
+ return;
+ }
+ emitBuildVersion(getMachoBuildVersionPlatformType(Target),
+ LinkedTargetVersion.getMajor(),
+ LinkedTargetVersion.getMinor().getValueOr(0),
+ LinkedTargetVersion.getSubminor().getValueOr(0),
+ SDKVersion);
+ ShouldEmitBuildVersion = true;
+ }
+
+ if (const Triple *TVT = DarwinTargetVariantTriple) {
+ if (Target.isMacOSX() && TVT->isMacCatalystEnvironment()) {
+ auto TVLinkedTargetVersion =
+ targetVersionOrMinimumSupportedOSVersion(*TVT, TVT->getiOSVersion());
+ emitDarwinTargetVariantBuildVersion(
+ getMachoBuildVersionPlatformType(*TVT),
+ TVLinkedTargetVersion.getMajor(),
+ TVLinkedTargetVersion.getMinor().getValueOr(0),
+ TVLinkedTargetVersion.getSubminor().getValueOr(0),
+ DarwinTargetVariantSDKVersion);
+ }
+ }
+
+ if (ShouldEmitBuildVersion)
+ return;
emitVersionMin(getMachoVersionMinLoadCommandType(Target),
LinkedTargetVersion.getMajor(),
- *LinkedTargetVersion.getMinor(),
- *LinkedTargetVersion.getSubminor(), SDKVersion);
+ LinkedTargetVersion.getMinor().getValueOr(0),
+ LinkedTargetVersion.getSubminor().getValueOr(0), SDKVersion);
}
diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp
index 7773d8828931..2a93c352c68a 100644
--- a/llvm/lib/MC/MCWin64EH.cpp
+++ b/llvm/lib/MC/MCWin64EH.cpp
@@ -351,7 +351,7 @@ static uint32_t ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction> Insns) {
// Unwind opcode encodings and restrictions are documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
- WinEH::Instruction &inst) {
+ const WinEH::Instruction &inst) {
uint8_t b, reg;
switch (static_cast<Win64EH::UnwindOpcodes>(inst.Operation)) {
default:
@@ -1050,10 +1050,8 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
// Emit epilog unwind instructions
for (auto &I : info->EpilogMap) {
auto &EpilogInstrs = I.second;
- for (uint32_t i = 0; i < EpilogInstrs.size(); i++) {
- WinEH::Instruction inst = EpilogInstrs[i];
+ for (const WinEH::Instruction &inst : EpilogInstrs)
ARM64EmitUnwindCode(streamer, info->Begin, inst);
- }
}
int32_t BytesMod = CodeWords * 4 - TotalCodeBytes;
diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index 277d88cf1cd2..16941b1cb727 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -484,15 +484,15 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
// Report errors for use of .indirect_symbol not in a symbol pointer section
// or stub section.
- for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
- ie = Asm.indirect_symbol_end(); it != ie; ++it) {
- const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
+ for (IndirectSymbolData &ISD : llvm::make_range(Asm.indirect_symbol_begin(),
+ Asm.indirect_symbol_end())) {
+ const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section);
if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
Section.getType() != MachO::S_SYMBOL_STUBS) {
- MCSymbol &Symbol = *it->Symbol;
+ MCSymbol &Symbol = *ISD.Symbol;
report_fatal_error("indirect symbol '" + Symbol.getName() +
"' not in a symbol pointer or stub section");
}
@@ -779,6 +779,17 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
LoadCommandsSize += sizeof(MachO::version_min_command);
}
+ const MCAssembler::VersionInfoType &TargetVariantVersionInfo =
+ Layout.getAssembler().getDarwinTargetVariantVersionInfo();
+
+ // Add the target variant version info load command size, if used.
+ if (TargetVariantVersionInfo.Major != 0) {
+ ++NumLoadCommands;
+ assert(TargetVariantVersionInfo.EmitBuildVersion &&
+ "target variant should use build version");
+ LoadCommandsSize += sizeof(MachO::build_version_command);
+ }
+
// Add the data-in-code load command size, if used.
unsigned NumDataRegions = Asm.getDataRegions().size();
if (NumDataRegions) {
@@ -862,38 +873,43 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
}
// Write out the deployment target information, if it's available.
- if (VersionInfo.Major != 0) {
- auto EncodeVersion = [](VersionTuple V) -> uint32_t {
- assert(!V.empty() && "empty version");
- unsigned Update = V.getSubminor() ? *V.getSubminor() : 0;
- unsigned Minor = V.getMinor() ? *V.getMinor() : 0;
- assert(Update < 256 && "unencodable update target version");
- assert(Minor < 256 && "unencodable minor target version");
- assert(V.getMajor() < 65536 && "unencodable major target version");
- return Update | (Minor << 8) | (V.getMajor() << 16);
- };
- uint32_t EncodedVersion = EncodeVersion(
- VersionTuple(VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
- uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
- ? EncodeVersion(VersionInfo.SDKVersion)
- : 0;
- if (VersionInfo.EmitBuildVersion) {
- // FIXME: Currently empty tools. Add clang version in the future.
- W.write<uint32_t>(MachO::LC_BUILD_VERSION);
- W.write<uint32_t>(sizeof(MachO::build_version_command));
- W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
- W.write<uint32_t>(EncodedVersion);
- W.write<uint32_t>(SDKVersion);
- W.write<uint32_t>(0); // Empty tools list.
- } else {
- MachO::LoadCommandType LCType
- = getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
- W.write<uint32_t>(LCType);
- W.write<uint32_t>(sizeof(MachO::version_min_command));
- W.write<uint32_t>(EncodedVersion);
- W.write<uint32_t>(SDKVersion);
- }
- }
+ auto EmitDeploymentTargetVersion =
+ [&](const MCAssembler::VersionInfoType &VersionInfo) {
+ auto EncodeVersion = [](VersionTuple V) -> uint32_t {
+ assert(!V.empty() && "empty version");
+ unsigned Update = V.getSubminor().getValueOr(0);
+ unsigned Minor = V.getMinor().getValueOr(0);
+ assert(Update < 256 && "unencodable update target version");
+ assert(Minor < 256 && "unencodable minor target version");
+ assert(V.getMajor() < 65536 && "unencodable major target version");
+ return Update | (Minor << 8) | (V.getMajor() << 16);
+ };
+ uint32_t EncodedVersion = EncodeVersion(VersionTuple(
+ VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
+ uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
+ ? EncodeVersion(VersionInfo.SDKVersion)
+ : 0;
+ if (VersionInfo.EmitBuildVersion) {
+ // FIXME: Currently empty tools. Add clang version in the future.
+ W.write<uint32_t>(MachO::LC_BUILD_VERSION);
+ W.write<uint32_t>(sizeof(MachO::build_version_command));
+ W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
+ W.write<uint32_t>(EncodedVersion);
+ W.write<uint32_t>(SDKVersion);
+ W.write<uint32_t>(0); // Empty tools list.
+ } else {
+ MachO::LoadCommandType LCType =
+ getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
+ W.write<uint32_t>(LCType);
+ W.write<uint32_t>(sizeof(MachO::version_min_command));
+ W.write<uint32_t>(EncodedVersion);
+ W.write<uint32_t>(SDKVersion);
+ }
+ };
+ if (VersionInfo.Major != 0)
+ EmitDeploymentTargetVersion(VersionInfo);
+ if (TargetVariantVersionInfo.Major != 0)
+ EmitDeploymentTargetVersion(TargetVariantVersionInfo);
// Write the data-in-code load command, if used.
uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
diff --git a/llvm/lib/MC/TargetRegistry.cpp b/llvm/lib/MC/TargetRegistry.cpp
index 0948a6b9f1a1..09684b1e5ad2 100644
--- a/llvm/lib/MC/TargetRegistry.cpp
+++ b/llvm/lib/MC/TargetRegistry.cpp
@@ -124,10 +124,10 @@ void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) {
array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn);
OS << " Registered Targets:\n";
- for (unsigned i = 0, e = Targets.size(); i != e; ++i) {
- OS << " " << Targets[i].first;
- OS.indent(Width - Targets[i].first.size()) << " - "
- << Targets[i].second->getShortDescription() << '\n';
+ for (const auto &Target : Targets) {
+ OS << " " << Target.first;
+ OS.indent(Width - Target.first.size())
+ << " - " << Target.second->getShortDescription() << '\n';
}
if (Targets.empty())
OS << " (none)\n";
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index ce997464caa7..da8bcec7f3d4 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -696,7 +696,7 @@ writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab,
return std::move(E);
return std::make_unique<SmallVectorMemoryBuffer>(
- std::move(ArchiveBufferVector));
+ std::move(ArchiveBufferVector), /*RequiresNullTerminator=*/false);
}
} // namespace llvm
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 84181ae5e501..6e56da1a31f3 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -210,6 +210,8 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) {
return ELF::R_SPARC_RELATIVE;
case ELF::EM_CSKY:
return ELF::R_CKCORE_RELATIVE;
+ case ELF::EM_VE:
+ return ELF::R_VE_RELATIVE;
case ELF::EM_AMDGPU:
break;
case ELF::EM_BPF:
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 7501661591f0..42e257516f4e 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -26,12 +26,15 @@
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -4719,3 +4722,46 @@ StringRef MachOObjectFile::mapDebugSectionName(StringRef Name) const {
.Case("debug_str_offs", "debug_str_offsets")
.Default(Name);
}
+
+Expected<std::vector<std::string>>
+MachOObjectFile::findDsymObjectMembers(StringRef Path) {
+ SmallString<256> BundlePath(Path);
+ // Normalize input path. This is necessary to accept `bundle.dSYM/`.
+ sys::path::remove_dots(BundlePath);
+ if (!sys::fs::is_directory(BundlePath) ||
+ sys::path::extension(BundlePath) != ".dSYM")
+ return std::vector<std::string>();
+ sys::path::append(BundlePath, "Contents", "Resources", "DWARF");
+ bool IsDir;
+ auto EC = sys::fs::is_directory(BundlePath, IsDir);
+ if (EC == errc::no_such_file_or_directory || (!EC && !IsDir))
+ return createStringError(
+ EC, "%s: expected directory 'Contents/Resources/DWARF' in dSYM bundle",
+ Path.str().c_str());
+ if (EC)
+ return createFileError(BundlePath, errorCodeToError(EC));
+
+ std::vector<std::string> ObjectPaths;
+ for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd;
+ Dir != DirEnd && !EC; Dir.increment(EC)) {
+ StringRef ObjectPath = Dir->path();
+ sys::fs::file_status Status;
+ if (auto EC = sys::fs::status(ObjectPath, Status))
+ return createFileError(ObjectPath, errorCodeToError(EC));
+ switch (Status.type()) {
+ case sys::fs::file_type::regular_file:
+ case sys::fs::file_type::symlink_file:
+ case sys::fs::file_type::type_unknown:
+ ObjectPaths.push_back(ObjectPath.str());
+ break;
+ default: /*ignore*/;
+ }
+ }
+ if (EC)
+ return createFileError(BundlePath, errorCodeToError(EC));
+ if (ObjectPaths.empty())
+ return createStringError(std::error_code(),
+ "%s: no objects found in dSYM bundle",
+ Path.str().c_str());
+ return ObjectPaths;
+}
diff --git a/llvm/lib/Object/MachOUniversalWriter.cpp b/llvm/lib/Object/MachOUniversalWriter.cpp
index 9673c97a10f0..ae1ff09a4f8f 100644
--- a/llvm/lib/Object/MachOUniversalWriter.cpp
+++ b/llvm/lib/Object/MachOUniversalWriter.cpp
@@ -19,7 +19,6 @@
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
-#include "llvm/Support/SmallVectorMemoryBuffer.h"
using namespace llvm;
using namespace object;
diff --git a/llvm/lib/ObjectYAML/COFFEmitter.cpp b/llvm/lib/ObjectYAML/COFFEmitter.cpp
index 66ad16db1ba4..d884e2fd55cd 100644
--- a/llvm/lib/ObjectYAML/COFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/COFFEmitter.cpp
@@ -64,11 +64,7 @@ struct COFFParser {
}
bool parseSections() {
- for (std::vector<COFFYAML::Section>::iterator i = Obj.Sections.begin(),
- e = Obj.Sections.end();
- i != e; ++i) {
- COFFYAML::Section &Sec = *i;
-
+ for (COFFYAML::Section &Sec : Obj.Sections) {
// If the name is less than 8 bytes, store it in place, otherwise
// store it in the string table.
StringRef Name = Sec.Name;
@@ -103,11 +99,7 @@ struct COFFParser {
}
bool parseSymbols() {
- for (std::vector<COFFYAML::Symbol>::iterator i = Obj.Symbols.begin(),
- e = Obj.Symbols.end();
- i != e; ++i) {
- COFFYAML::Symbol &Sym = *i;
-
+ for (COFFYAML::Symbol &Sym : Obj.Symbols) {
// If the name is less than 8 bytes, store it in place, otherwise
// store it in the string table.
StringRef Name = Sym.Name;
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index e0dde4433d24..9b9266998ea6 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -464,29 +464,31 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_MIPS_ARCH_64R6, EF_MIPS_ARCH);
break;
case ELF::EM_HEXAGON:
- BCase(EF_HEXAGON_MACH_V2);
- BCase(EF_HEXAGON_MACH_V3);
- BCase(EF_HEXAGON_MACH_V4);
- BCase(EF_HEXAGON_MACH_V5);
- BCase(EF_HEXAGON_MACH_V55);
- BCase(EF_HEXAGON_MACH_V60);
- BCase(EF_HEXAGON_MACH_V62);
- BCase(EF_HEXAGON_MACH_V65);
- BCase(EF_HEXAGON_MACH_V66);
- BCase(EF_HEXAGON_MACH_V67);
- BCase(EF_HEXAGON_MACH_V67T);
- BCase(EF_HEXAGON_MACH_V68);
- BCase(EF_HEXAGON_ISA_V2);
- BCase(EF_HEXAGON_ISA_V3);
- BCase(EF_HEXAGON_ISA_V4);
- BCase(EF_HEXAGON_ISA_V5);
- BCase(EF_HEXAGON_ISA_V55);
- BCase(EF_HEXAGON_ISA_V60);
- BCase(EF_HEXAGON_ISA_V62);
- BCase(EF_HEXAGON_ISA_V65);
- BCase(EF_HEXAGON_ISA_V66);
- BCase(EF_HEXAGON_ISA_V67);
- BCase(EF_HEXAGON_ISA_V68);
+ BCaseMask(EF_HEXAGON_MACH_V2, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V3, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V4, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V5, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V55, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V60, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V62, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V65, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V66, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V67, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V67T, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V68, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_MACH_V69, EF_HEXAGON_MACH);
+ BCaseMask(EF_HEXAGON_ISA_V2, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V3, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V4, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V5, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V55, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V60, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V62, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V65, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V66, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V67, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V68, EF_HEXAGON_ISA);
+ BCaseMask(EF_HEXAGON_ISA_V69, EF_HEXAGON_ISA);
break;
case ELF::EM_AVR:
BCaseMask(EF_AVR_ARCH_AVR1, EF_AVR_ARCH_MASK);
diff --git a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
index 85d1f82bfafc..cf0d058c518c 100644
--- a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
@@ -86,13 +86,13 @@ bool XCOFFWriter::nameShouldBeInStringTable(StringRef SymbolName) {
}
bool XCOFFWriter::initRelocations(uint64_t &CurrentOffset) {
- for (uint16_t I = 0, E = InitSections.size(); I < E; ++I) {
- if (!InitSections[I].Relocations.empty()) {
- InitSections[I].NumberOfRelocations = InitSections[I].Relocations.size();
- InitSections[I].FileOffsetToRelocations = CurrentOffset;
+ for (XCOFFYAML::Section &InitSection : InitSections) {
+ if (!InitSection.Relocations.empty()) {
+ InitSection.NumberOfRelocations = InitSection.Relocations.size();
+ InitSection.FileOffsetToRelocations = CurrentOffset;
uint64_t RelSize = Is64Bit ? XCOFF::RelocationSerializationSize64
: XCOFF::RelocationSerializationSize32;
- CurrentOffset += InitSections[I].NumberOfRelocations * RelSize;
+ CurrentOffset += InitSection.NumberOfRelocations * RelSize;
if (CurrentOffset > MaxRawDataSize) {
ErrHandler("maximum object size of" + Twine(MaxRawDataSize) +
"exceeded when writing relocation data");
diff --git a/llvm/lib/ObjectYAML/YAML.cpp b/llvm/lib/ObjectYAML/YAML.cpp
index 5dcb113d3395..54e8c627d5a1 100644
--- a/llvm/lib/ObjectYAML/YAML.cpp
+++ b/llvm/lib/ObjectYAML/YAML.cpp
@@ -30,9 +30,8 @@ StringRef yaml::ScalarTraits<yaml::BinaryRef>::input(StringRef Scalar, void *,
return "BinaryRef hex string must contain an even number of nybbles.";
// TODO: Can we improve YAMLIO to permit a more accurate diagnostic here?
// (e.g. a caret pointing to the offending character).
- for (unsigned I = 0, N = Scalar.size(); I != N; ++I)
- if (!llvm::isHexDigit(Scalar[I]))
- return "BinaryRef hex string must contain only hex digits.";
+ if (!llvm::all_of(Scalar, llvm::isHexDigit))
+ return "BinaryRef hex string must contain only hex digits.";
Val = yaml::BinaryRef(Scalar);
return {};
}
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index 19e05b9272bb..c93b7ad7f5fa 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -591,16 +591,16 @@ static void PrintHelpOptionList(raw_ostream &OS, StringRef Title,
// Find the maximum option length.
unsigned OptionFieldWidth = 0;
- for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) {
+ for (const OptionInfo &Opt : OptionHelp) {
// Limit the amount of padding we are willing to give up for alignment.
- unsigned Length = OptionHelp[i].Name.size();
+ unsigned Length = Opt.Name.size();
if (Length <= 23)
OptionFieldWidth = std::max(OptionFieldWidth, Length);
}
const unsigned InitialPad = 2;
- for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) {
- const std::string &Option = OptionHelp[i].Name;
+ for (const OptionInfo &Opt : OptionHelp) {
+ const std::string &Option = Opt.Name;
int Pad = OptionFieldWidth - int(Option.size());
OS.indent(InitialPad) << Option;
@@ -609,7 +609,7 @@ static void PrintHelpOptionList(raw_ostream &OS, StringRef Title,
OS << "\n";
Pad = OptionFieldWidth + InitialPad;
}
- OS.indent(Pad + 1) << OptionHelp[i].HelpText << '\n';
+ OS.indent(Pad + 1) << Opt.HelpText << '\n';
}
}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 561a881bab0c..d7615ef4e9bf 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CostModel.h"
+#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/Analysis/DDG.h"
#include "llvm/Analysis/DDGPrinter.h"
#include "llvm/Analysis/Delinearization.h"
@@ -151,6 +152,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/GuardWidening.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index de1b0ace7876..a6a36ff25402 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -178,6 +178,10 @@ static cl::opt<bool> EnableNoRerunSimplificationPipeline(
"than once in the case that SCC mutations cause a function to be "
"visited multiple times as long as the function has not been changed"));
+static cl::opt<bool> EnableMergeFunctions(
+ "enable-merge-functions", cl::init(false), cl::Hidden,
+ cl::desc("Enable function merging as part of the optimization pipeline"));
+
PipelineTuningOptions::PipelineTuningOptions() {
LoopInterleaving = true;
LoopVectorization = true;
@@ -187,7 +191,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
LicmMssaOptCap = SetLicmMssaOptCap;
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
CallGraphProfile = true;
- MergeFunctions = false;
+ MergeFunctions = EnableMergeFunctions;
EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
}
@@ -418,9 +422,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
if (Level == OptimizationLevel::O3)
FPM.addPass(AggressiveInstCombinePass());
- FPM.addPass(InstCombinePass());
if (!Level.isOptimizingForSize())
FPM.addPass(LibCallsShrinkWrapPass());
@@ -754,9 +758,11 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
return MIWP;
}
-ModuleInlinerPass
+ModulePassManager
PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
ThinOrFullLTOPhase Phase) {
+ ModulePassManager MPM;
+
InlineParams IP = getInlineParamsFromOptLevel(Level);
if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
PGOOpt->Action == PGOOptions::SampleUse)
@@ -773,7 +779,16 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
// inline deferral logic in module inliner.
IP.EnableDeferral = false;
- return ModuleInlinerPass(IP, UseInlineAdvisor);
+ MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor));
+
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ buildFunctionSimplificationPipeline(Level, Phase),
+ PTO.EagerlyInvalidateAnalyses));
+
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+ CoroSplitPass(Level != OptimizationLevel::O0)));
+
+ return MPM;
}
ModulePassManager
@@ -980,26 +995,28 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
FPM.addPass(InstCombinePass());
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+ ExtraVectorPassManager ExtraPasses;
// At higher optimization levels, try to clean up any runtime overlap and
// alignment checks inserted by the vectorizer. We want to track correlated
// runtime checks for two inner loops in the same outer loop, fold any
// common computations, hoist loop-invariant aspects out of any outer loop,
// and unswitch the runtime checks if possible. Once hoisted, we may have
// dead (or speculatable) control flows or more combining opportunities.
- FPM.addPass(EarlyCSEPass());
- FPM.addPass(CorrelatedValuePropagationPass());
- FPM.addPass(InstCombinePass());
+ ExtraPasses.addPass(EarlyCSEPass());
+ ExtraPasses.addPass(CorrelatedValuePropagationPass());
+ ExtraPasses.addPass(InstCombinePass());
LoopPassManager LPM;
LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
OptimizationLevel::O3));
- FPM.addPass(
+ ExtraPasses.addPass(
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(
+ ExtraPasses.addPass(
createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
+ ExtraPasses.addPass(SimplifyCFGPass());
+ ExtraPasses.addPass(InstCombinePass());
+ FPM.addPass(std::move(ExtraPasses));
}
// Now that we've formed fast to execute loop structures, we do further
@@ -1149,8 +1166,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// Disable header duplication at -Oz.
LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
// Some loops may have become dead by now. Try to delete them.
- // FIXME: see disscussion in https://reviews.llvm.org/D112851
- // this may need to be revisited once GVN is more powerful.
+ // FIXME: see discussion in https://reviews.llvm.org/D112851,
+ // this may need to be revisited once we run GVN before loop deletion
+ // in the simplification pipeline.
LPM.addPass(LoopDeletionPass());
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
@@ -1167,23 +1185,6 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
- // Split out cold code. Splitting is done late to avoid hiding context from
- // other optimizations and inadvertently regressing performance. The tradeoff
- // is that this has a higher code size cost than splitting early.
- if (EnableHotColdSplit && !LTOPreLink)
- MPM.addPass(HotColdSplittingPass());
-
- // Search the code for similar regions of code. If enough similar regions can
- // be found where extracting the regions into their own function will decrease
- // the size of the program, we extract the regions, a deduplicate the
- // structurally similar regions.
- if (EnableIROutliner)
- MPM.addPass(IROutlinerPass());
-
- // Merge functions if requested.
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
-
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
// canonicalization pass that enables other optimizations. As a result,
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
@@ -1211,6 +1212,23 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
for (auto &C : OptimizerLastEPCallbacks)
C(MPM, Level);
+ // Split out cold code. Splitting is done late to avoid hiding context from
+ // other optimizations and inadvertently regressing performance. The tradeoff
+ // is that this has a higher code size cost than splitting early.
+ if (EnableHotColdSplit && !LTOPreLink)
+ MPM.addPass(HotColdSplittingPass());
+
+ // Search the code for similar regions of code. If enough similar regions can
+ // be found where extracting the regions into their own function will decrease
+ // the size of the program, we extract the regions, a deduplicate the
+ // structurally similar regions.
+ if (EnableIROutliner)
+ MPM.addPass(IROutlinerPass());
+
+ // Merge functions if requested.
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
if (PTO.CallGraphProfile)
MPM.addPass(CGProfilePass());
@@ -1521,9 +1539,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// function pointers. When this happens, we often have to resolve varargs
// calls, etc, so let instcombine do this.
FunctionPassManager PeepholeFPM;
+ PeepholeFPM.addPass(InstCombinePass());
if (Level == OptimizationLevel::O3)
PeepholeFPM.addPass(AggressiveInstCombinePass());
- PeepholeFPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(PeepholeFPM, Level);
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index c2032b5b8276..74613a7fcce0 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -185,6 +185,7 @@ FUNCTION_ANALYSIS("aa", AAManager())
FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis())
FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis())
FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis())
+FUNCTION_ANALYSIS("cycles", CycleAnalysis())
FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis())
FUNCTION_ANALYSIS("demanded-bits", DemandedBitsAnalysis())
@@ -202,6 +203,7 @@ FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis())
FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis())
+FUNCTION_ANALYSIS("should-run-extra-vector-passes", ShouldRunExtraVectorPasses())
FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis())
FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
FUNCTION_ANALYSIS("targetir",
@@ -253,6 +255,7 @@ FUNCTION_PASS("dse", DSEPass())
FUNCTION_PASS("dot-cfg", CFGPrinterPass())
FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
+FUNCTION_PASS("flattencfg", FlattenCFGPass())
FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass())
FUNCTION_PASS("gvn-hoist", GVNHoistPass())
FUNCTION_PASS("gvn-sink", GVNSinkPass())
@@ -303,6 +306,7 @@ FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
FUNCTION_PASS("print<cost-model>", CostModelPrinterPass(dbgs()))
+FUNCTION_PASS("print<cycles>", CycleInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<divergence>", DivergenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index 27a6c519ff82..23c825c78713 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -1262,11 +1262,6 @@ void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
namespace {
-enum IRChangeDiffType { InBefore, InAfter, IsCommon, NumIRChangeDiffTypes };
-
-// Describe where a given element exists.
-std::string Colours[NumIRChangeDiffTypes];
-
class DisplayNode;
class DotCfgDiffDisplayGraph;
@@ -1274,19 +1269,19 @@ class DotCfgDiffDisplayGraph;
class DisplayElement {
public:
// Is this in before, after, or both?
- IRChangeDiffType getType() const { return Type; }
+ StringRef getColour() const { return Colour; }
protected:
- DisplayElement(IRChangeDiffType T) : Type(T) {}
- const IRChangeDiffType Type;
+ DisplayElement(StringRef Colour) : Colour(Colour) {}
+ const StringRef Colour;
};
// An edge representing a transition between basic blocks in the
// dot-cfg-changes graph.
class DisplayEdge : public DisplayElement {
public:
- DisplayEdge(std::string V, DisplayNode &Node, IRChangeDiffType T)
- : DisplayElement(T), Value(V), Node(Node) {}
+ DisplayEdge(std::string Value, DisplayNode &Node, StringRef Colour)
+ : DisplayElement(Colour), Value(Value), Node(Node) {}
// The value on which the transition is made.
std::string getValue() const { return Value; }
// The node (representing a basic block) reached by this transition.
@@ -1302,8 +1297,8 @@ class DisplayNode : public DisplayElement {
public:
// \p C is the content for the node, \p T indicates the colour for the
// outline of the node
- DisplayNode(std::string C, IRChangeDiffType T)
- : DisplayElement(T), Content(C) {}
+ DisplayNode(std::string Content, StringRef Colour)
+ : DisplayElement(Colour), Content(Content) {}
// Iterator to the child nodes. Required by GraphWriter.
using ChildIterator = std::unordered_set<DisplayNode *>::const_iterator;
@@ -1315,13 +1310,13 @@ public:
EdgeIterator edges_begin() const { return EdgePtrs.cbegin(); }
EdgeIterator edges_end() const { return EdgePtrs.cend(); }
- // Create an edge to \p Node on value \p V, with type \p T.
- void createEdge(StringRef V, DisplayNode &Node, IRChangeDiffType T);
+ // Create an edge to \p Node on value \p Value, with colour \p Colour.
+ void createEdge(StringRef Value, DisplayNode &Node, StringRef Colour);
// Return the content of this node.
std::string getContent() const { return Content; }
- // Return the type of the edge to node \p S.
+ // Return the edge to node \p S.
const DisplayEdge &getEdge(const DisplayNode &To) const {
assert(EdgeMap.find(&To) != EdgeMap.end() && "Expected to find edge.");
return *EdgeMap.find(&To)->second;
@@ -1383,9 +1378,9 @@ public:
}
// Create a node.
- void createNode(std::string C, IRChangeDiffType T) {
+ void createNode(std::string C, StringRef Colour) {
assert(!NodeGenerationComplete && "Unexpected node creation");
- Nodes.emplace_back(C, T);
+ Nodes.emplace_back(C, Colour);
}
// Return the node at index \p N to avoid problems with vectors reallocating.
DisplayNode &getNode(unsigned N) {
@@ -1408,13 +1403,13 @@ public:
// Return a string with colour information for Dot. Required by GraphWriter.
std::string getNodeAttributes(const DisplayNode &Node) const {
- return attribute(Node.getType());
+ return attribute(Node.getColour());
}
// Return a string with colour information for Dot. Required by GraphWriter.
std::string getEdgeColorAttr(const DisplayNode &From,
const DisplayNode &To) const {
- return attribute(From.getEdge(To).getType());
+ return attribute(From.getEdge(To).getColour());
}
// Get the starting basic block. Required by GraphWriter.
@@ -1425,7 +1420,9 @@ public:
protected:
// Return the string containing the colour to use as a Dot attribute.
- std::string attribute(IRChangeDiffType T) const;
+ std::string attribute(StringRef Colour) const {
+ return "color=" + Colour.str();
+ }
bool NodeGenerationComplete = false;
const std::string GraphName;
@@ -1434,10 +1431,10 @@ protected:
DisplayNode *EntryNode = nullptr;
};
-void DisplayNode::createEdge(StringRef V, DisplayNode &Node,
- IRChangeDiffType T) {
+void DisplayNode::createEdge(StringRef Value, DisplayNode &Node,
+ StringRef Colour) {
assert(!AllEdgesCreated && "Expected to be able to still create edges.");
- Edges.emplace_back(V.str(), Node, T);
+ Edges.emplace_back(Value.str(), Node, Colour);
Children.insert(&Node);
}
@@ -1458,13 +1455,14 @@ public:
DotCfgDiffNode() = delete;
// Create a node in Dot difference graph \p G representing the basic block
- // represented by \p BD with type \p T (where it exists).
+ // represented by \p BD with colour \p Colour (where it exists).
DotCfgDiffNode(DotCfgDiff &G, unsigned N, const BlockDataT<DCData> &BD,
- IRChangeDiffType T)
- : Graph(G), N(N), Data{&BD, nullptr}, Type(T) {}
+ StringRef Colour)
+ : Graph(G), N(N), Data{&BD, nullptr}, Colour(Colour) {}
DotCfgDiffNode(const DotCfgDiffNode &DN)
- : Graph(DN.Graph), N(DN.N), Data{DN.Data[0], DN.Data[1]}, Type(DN.Type),
- EdgesMap(DN.EdgesMap), Children(DN.Children), Edges(DN.Edges) {}
+ : Graph(DN.Graph), N(DN.N), Data{DN.Data[0], DN.Data[1]},
+ Colour(DN.Colour), EdgesMap(DN.EdgesMap), Children(DN.Children),
+ Edges(DN.Edges) {}
unsigned getIndex() const { return N; }
@@ -1473,29 +1471,29 @@ public:
assert(Data[0] && "Expected Data[0] to be set.");
return Data[0]->getLabel();
}
- // Return where this block exists.
- IRChangeDiffType getType() const { return Type; }
+ // Return the colour for this block
+ StringRef getColour() const { return Colour; }
// Change this basic block from being only in before to being common.
// Save the pointer to \p Other.
void setCommon(const BlockDataT<DCData> &Other) {
assert(!Data[1] && "Expected only one block datum");
Data[1] = &Other;
- Type = IsCommon;
+ Colour = CommonColour;
}
- // Add an edge to \p E of type {\p Value, \p T}.
- void addEdge(unsigned E, StringRef Value, IRChangeDiffType T) {
+ // Add an edge to \p E of colour {\p Value, \p Colour}.
+ void addEdge(unsigned E, StringRef Value, StringRef Colour) {
// This is a new edge or it is an edge being made common.
- assert((EdgesMap.count(E) == 0 || T == IsCommon) &&
- "Unexpected edge count and type.");
- EdgesMap[E] = {Value.str(), T};
+ assert((EdgesMap.count(E) == 0 || Colour == CommonColour) &&
+ "Unexpected edge count and color.");
+ EdgesMap[E] = {Value.str(), Colour};
}
// Record the children and create edges.
void finalize(DotCfgDiff &G);
- // Return the type of the edge to node \p S.
- std::pair<std::string, IRChangeDiffType> getEdge(const unsigned S) const {
+ // Return the colour of the edge to node \p S.
+ StringRef getEdgeColour(const unsigned S) const {
assert(EdgesMap.count(S) == 1 && "Expected to find edge.");
- return EdgesMap.at(S);
+ return EdgesMap.at(S).second;
}
// Return the string representing the basic block.
@@ -1508,8 +1506,8 @@ protected:
DotCfgDiff &Graph;
const unsigned N;
const BlockDataT<DCData> *Data[2];
- IRChangeDiffType Type;
- std::map<const unsigned, std::pair<std::string, IRChangeDiffType>> EdgesMap;
+ StringRef Colour;
+ std::map<const unsigned, std::pair<std::string, StringRef>> EdgesMap;
std::vector<unsigned> Children;
std::vector<unsigned> Edges;
};
@@ -1552,12 +1550,11 @@ public:
protected:
// Return the string surrounded by HTML to make it the appropriate colour.
- std::string colourize(std::string S, IRChangeDiffType T) const;
+ std::string colourize(std::string S, StringRef Colour) const;
- void createNode(StringRef Label, const BlockDataT<DCData> &BD,
- IRChangeDiffType T) {
+ void createNode(StringRef Label, const BlockDataT<DCData> &BD, StringRef C) {
unsigned Pos = Nodes.size();
- Nodes.emplace_back(*this, Pos, BD, T);
+ Nodes.emplace_back(*this, Pos, BD, C);
NodePosition.insert({Label, Pos});
}
@@ -1572,7 +1569,7 @@ protected:
};
std::string DotCfgDiffNode::getBodyContent() const {
- if (Type == IsCommon) {
+ if (Colour == CommonColour) {
assert(Data[1] && "Expected Data[1] to be set.");
StringRef SR[2];
@@ -1586,11 +1583,11 @@ std::string DotCfgDiffNode::getBodyContent() const {
}
SmallString<80> OldLineFormat = formatv(
- "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", Colours[InBefore]);
+ "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", BeforeColour);
SmallString<80> NewLineFormat = formatv(
- "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", Colours[InAfter]);
+ "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", AfterColour);
SmallString<80> UnchangedLineFormat = formatv(
- "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", Colours[IsCommon]);
+ "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", CommonColour);
std::string Diff = Data[0]->getLabel().str();
Diff += ":\n<BR align=\"left\"/>" +
doSystemDiff(makeHTMLReady(SR[0]), makeHTMLReady(SR[1]),
@@ -1625,7 +1622,7 @@ std::string DotCfgDiffNode::getBodyContent() const {
// drop predecessors as they can be big and are redundant
BS1 = BS1.drop_until([](char C) { return C == '\n'; }).drop_front();
- std::string S = "<FONT COLOR=\"" + Colours[Type] + "\">" + Label.str() + ":";
+ std::string S = "<FONT COLOR=\"" + Colour.str() + "\">" + Label.str() + ":";
// align each line to the left.
while (BS1.size()) {
@@ -1638,26 +1635,22 @@ std::string DotCfgDiffNode::getBodyContent() const {
return S;
}
-std::string DotCfgDiff::colourize(std::string S, IRChangeDiffType T) const {
+std::string DotCfgDiff::colourize(std::string S, StringRef Colour) const {
if (S.length() == 0)
return S;
- return "<FONT COLOR=\"" + Colours[T] + "\">" + S + "</FONT>";
-}
-
-std::string DotCfgDiffDisplayGraph::attribute(IRChangeDiffType T) const {
- return "color=" + Colours[T];
+ return "<FONT COLOR=\"" + Colour.str() + "\">" + S + "</FONT>";
}
DotCfgDiff::DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before,
const FuncDataT<DCData> &After)
: GraphName(Title.str()) {
- StringMap<IRChangeDiffType> EdgesMap;
+ StringMap<StringRef> EdgesMap;
// Handle each basic block in the before IR.
for (auto &B : Before.getData()) {
StringRef Label = B.getKey();
const BlockDataT<DCData> &BD = B.getValue();
- createNode(Label, BD, InBefore);
+ createNode(Label, BD, BeforeColour);
// Create transitions with names made up of the from block label, the value
// on which the transition is made and the to block label.
@@ -1666,7 +1659,7 @@ DotCfgDiff::DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before,
Sink != E; ++Sink) {
std::string Key = (Label + " " + Sink->getKey().str()).str() + " " +
BD.getData().getSuccessorLabel(Sink->getKey()).str();
- EdgesMap.insert({Key, InBefore});
+ EdgesMap.insert({Key, BeforeColour});
}
}
@@ -1677,7 +1670,7 @@ DotCfgDiff::DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before,
unsigned C = NodePosition.count(Label);
if (C == 0)
// This only exists in the after IR. Create the node.
- createNode(Label, BD, InAfter);
+ createNode(Label, BD, AfterColour);
else {
assert(C == 1 && "Unexpected multiple nodes.");
Nodes[NodePosition[Label]].setCommon(BD);
@@ -1690,9 +1683,9 @@ DotCfgDiff::DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before,
BD.getData().getSuccessorLabel(Sink->getKey()).str();
unsigned C = EdgesMap.count(Key);
if (C == 0)
- EdgesMap.insert({Key, InAfter});
+ EdgesMap.insert({Key, AfterColour});
else {
- EdgesMap[Key] = IsCommon;
+ EdgesMap[Key] = CommonColour;
}
}
}
@@ -1712,18 +1705,18 @@ DotCfgDiff::DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before,
DotCfgDiffNode &SourceNode = Nodes[NodePosition[Source]];
assert(NodePosition.count(Sink) == 1 && "Expected to find node.");
unsigned SinkNode = NodePosition[Sink];
- IRChangeDiffType T = E.second;
+ StringRef Colour = E.second;
// Look for an edge from Source to Sink
if (EdgeLabels.count(SourceSink) == 0)
- EdgeLabels.insert({SourceSink, colourize(Value.str(), T)});
+ EdgeLabels.insert({SourceSink, colourize(Value.str(), Colour)});
else {
StringRef V = EdgeLabels.find(SourceSink)->getValue();
- std::string NV = colourize(V.str() + " " + Value.str(), T);
- T = IsCommon;
+ std::string NV = colourize(V.str() + " " + Value.str(), Colour);
+ Colour = CommonColour;
EdgeLabels[SourceSink] = NV;
}
- SourceNode.addEdge(SinkNode, Value, T);
+ SourceNode.addEdge(SinkNode, Value, Colour);
}
for (auto &I : Nodes)
I.finalize(*this);
@@ -1744,7 +1737,7 @@ DotCfgDiffDisplayGraph DotCfgDiff::createDisplayGraph(StringRef Title,
for (auto &I : Nodes) {
if (I.getIndex() == Entry)
EntryIndex = Index;
- G.createNode(I.getBodyContent(), I.getType());
+ G.createNode(I.getBodyContent(), I.getColour());
NodeMap.insert({I.getIndex(), Index++});
}
assert(EntryIndex >= 0 && "Expected entry node index to be set.");
@@ -1766,12 +1759,12 @@ void DotCfgDiffNode::createDisplayEdges(
for (auto I : Edges) {
unsigned SinkNodeIndex = I;
- IRChangeDiffType Type = getEdge(SinkNodeIndex).second;
+ StringRef Colour = getEdgeColour(SinkNodeIndex);
const DotCfgDiffNode *SinkNode = &Graph.getNode(SinkNodeIndex);
StringRef Label = Graph.getEdgeSourceLabel(getIndex(), SinkNodeIndex);
DisplayNode &SinkDisplayNode = DisplayGraph.getNode(SinkNode->getIndex());
- SourceDisplayNode.createEdge(Label, SinkDisplayNode, Type);
+ SourceDisplayNode.createEdge(Label, SinkDisplayNode, Colour);
}
SourceDisplayNode.createEdgeMap();
}
@@ -1891,12 +1884,7 @@ DCData::DCData(const BasicBlock &B) {
}
DotCfgChangeReporter::DotCfgChangeReporter(bool Verbose)
- : ChangeReporter<IRDataT<DCData>>(Verbose) {
- // Set up the colours based on the hidden options.
- Colours[InBefore] = BeforeColour;
- Colours[InAfter] = AfterColour;
- Colours[IsCommon] = CommonColour;
-}
+ : ChangeReporter<IRDataT<DCData>>(Verbose) {}
void DotCfgChangeReporter::handleFunctionCompare(
StringRef Name, StringRef Prefix, StringRef PassID, StringRef Divider,
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index ab3487ecffe8..34e0c5ebcd58 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -110,6 +110,18 @@ static std::string getInstrProfErrString(instrprof_error Err,
case instrprof_error::malformed:
OS << "malformed instrumentation profile data";
break;
+ case instrprof_error::missing_debug_info_for_correlation:
+ OS << "debug info for correlation is required";
+ break;
+ case instrprof_error::unexpected_debug_info_for_correlation:
+ OS << "debug info for correlation is not necessary";
+ break;
+ case instrprof_error::unable_to_correlate_profile:
+ OS << "unable to correlate profile";
+ break;
+ case instrprof_error::unsupported_debug_format:
+ OS << "unsupported debug info format (only DWARF is supported)";
+ break;
case instrprof_error::invalid_prof:
OS << "invalid profile created. Please file a bug "
"at: " BUG_REPORT_URL
@@ -533,8 +545,8 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const {
uint64_t FuncSum = 0;
Sum.NumEntries += Counts.size();
- for (size_t F = 0, E = Counts.size(); F < E; ++F)
- FuncSum += Counts[F];
+ for (uint64_t Count : Counts)
+ FuncSum += Count;
Sum.CountSum += FuncSum;
for (uint32_t VK = IPVK_First; VK <= IPVK_Last; ++VK) {
@@ -674,9 +686,9 @@ void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
void InstrProfValueSiteRecord::scale(uint64_t N, uint64_t D,
function_ref<void(instrprof_error)> Warn) {
- for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) {
+ for (InstrProfValueData &I : ValueData) {
bool Overflowed;
- I->Count = SaturatingMultiply(I->Count, N, &Overflowed) / D;
+ I.Count = SaturatingMultiply(I.Count, N, &Overflowed) / D;
if (Overflowed)
Warn(instrprof_error::counter_overflow);
}
@@ -1175,7 +1187,8 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
// aware this is an ir_level profile so it can set the version flag.
GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
- bool InstrEntryBBEnabled) {
+ bool InstrEntryBBEnabled,
+ bool DebugInfoCorrelate) {
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
Type *IntTy64 = Type::getInt64Ty(M.getContext());
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
@@ -1183,6 +1196,8 @@ GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
ProfileVersion |= VARIANT_MASK_CSIR_PROF;
if (InstrEntryBBEnabled)
ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
+ if (DebugInfoCorrelate)
+ ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
auto IRLevelVersionVariable = new GlobalVariable(
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
new file mode 100644
index 000000000000..f9c113027da2
--- /dev/null
+++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
@@ -0,0 +1,264 @@
+//===-- InstrProfCorrelator.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/InstrProfCorrelator.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+#define DEBUG_TYPE "correlator"
+
+using namespace llvm;
+
+/// Get the __llvm_prf_cnts section.
+Expected<object::SectionRef> getCountersSection(const object::ObjectFile &Obj) {
+ for (auto &Section : Obj.sections())
+ if (auto SectionName = Section.getName())
+ if (SectionName.get() == INSTR_PROF_CNTS_SECT_NAME)
+ return Section;
+ return make_error<InstrProfError>(
+ instrprof_error::unable_to_correlate_profile);
+}
+
+const char *InstrProfCorrelator::FunctionNameAttributeName = "Function Name";
+const char *InstrProfCorrelator::CFGHashAttributeName = "CFG Hash";
+const char *InstrProfCorrelator::NumCountersAttributeName = "Num Counters";
+
+llvm::Expected<std::unique_ptr<InstrProfCorrelator::Context>>
+InstrProfCorrelator::Context::get(std::unique_ptr<MemoryBuffer> Buffer,
+ const object::ObjectFile &Obj) {
+ auto CountersSection = getCountersSection(Obj);
+ if (auto Err = CountersSection.takeError())
+ return std::move(Err);
+ auto C = std::make_unique<Context>();
+ C->Buffer = std::move(Buffer);
+ C->CountersSectionStart = CountersSection->getAddress();
+ C->CountersSectionEnd = C->CountersSectionStart + CountersSection->getSize();
+ C->ShouldSwapBytes = Obj.isLittleEndian() != sys::IsLittleEndianHost;
+ return Expected<std::unique_ptr<Context>>(std::move(C));
+}
+
+llvm::Expected<std::unique_ptr<InstrProfCorrelator>>
+InstrProfCorrelator::get(StringRef DebugInfoFilename) {
+ auto DsymObjectsOrErr =
+ object::MachOObjectFile::findDsymObjectMembers(DebugInfoFilename);
+ if (auto Err = DsymObjectsOrErr.takeError())
+ return std::move(Err);
+ if (!DsymObjectsOrErr->empty()) {
+ // TODO: Enable profile correlation when there are multiple objects in a
+ // dSYM bundle.
+ if (DsymObjectsOrErr->size() > 1)
+ return createStringError(
+ std::error_code(),
+ "Profile correlation using multiple objects is not yet supported");
+ DebugInfoFilename = *DsymObjectsOrErr->begin();
+ }
+ auto BufferOrErr =
+ errorOrToExpected(MemoryBuffer::getFile(DebugInfoFilename));
+ if (auto Err = BufferOrErr.takeError())
+ return std::move(Err);
+
+ return get(std::move(*BufferOrErr));
+}
+
+llvm::Expected<std::unique_ptr<InstrProfCorrelator>>
+InstrProfCorrelator::get(std::unique_ptr<MemoryBuffer> Buffer) {
+ auto BinOrErr = object::createBinary(*Buffer);
+ if (auto Err = BinOrErr.takeError())
+ return std::move(Err);
+
+ if (auto *Obj = dyn_cast<object::ObjectFile>(BinOrErr->get())) {
+ auto CtxOrErr = Context::get(std::move(Buffer), *Obj);
+ if (auto Err = CtxOrErr.takeError())
+ return std::move(Err);
+ auto T = Obj->makeTriple();
+ if (T.isArch64Bit())
+ return InstrProfCorrelatorImpl<uint64_t>::get(std::move(*CtxOrErr), *Obj);
+ if (T.isArch32Bit())
+ return InstrProfCorrelatorImpl<uint32_t>::get(std::move(*CtxOrErr), *Obj);
+ }
+ return make_error<InstrProfError>(
+ instrprof_error::unable_to_correlate_profile);
+}
+
+namespace llvm {
+
+template <>
+InstrProfCorrelatorImpl<uint32_t>::InstrProfCorrelatorImpl(
+ std::unique_ptr<InstrProfCorrelator::Context> Ctx)
+ : InstrProfCorrelatorImpl(InstrProfCorrelatorKind::CK_32Bit,
+ std::move(Ctx)) {}
+template <>
+InstrProfCorrelatorImpl<uint64_t>::InstrProfCorrelatorImpl(
+ std::unique_ptr<InstrProfCorrelator::Context> Ctx)
+ : InstrProfCorrelatorImpl(InstrProfCorrelatorKind::CK_64Bit,
+ std::move(Ctx)) {}
+template <>
+bool InstrProfCorrelatorImpl<uint32_t>::classof(const InstrProfCorrelator *C) {
+ return C->getKind() == InstrProfCorrelatorKind::CK_32Bit;
+}
+template <>
+bool InstrProfCorrelatorImpl<uint64_t>::classof(const InstrProfCorrelator *C) {
+ return C->getKind() == InstrProfCorrelatorKind::CK_64Bit;
+}
+
+} // end namespace llvm
+
+template <class IntPtrT>
+llvm::Expected<std::unique_ptr<InstrProfCorrelatorImpl<IntPtrT>>>
+InstrProfCorrelatorImpl<IntPtrT>::get(
+ std::unique_ptr<InstrProfCorrelator::Context> Ctx,
+ const object::ObjectFile &Obj) {
+ if (Obj.isELF() || Obj.isMachO()) {
+ auto DICtx = DWARFContext::create(Obj);
+ return std::make_unique<DwarfInstrProfCorrelator<IntPtrT>>(std::move(DICtx),
+ std::move(Ctx));
+ }
+ return make_error<InstrProfError>(instrprof_error::unsupported_debug_format);
+}
+
+template <class IntPtrT>
+Error InstrProfCorrelatorImpl<IntPtrT>::correlateProfileData() {
+ assert(Data.empty() && CompressedNames.empty() && Names.empty());
+ correlateProfileDataImpl();
+ auto Result =
+ collectPGOFuncNameStrings(Names, /*doCompression=*/true, CompressedNames);
+ Names.clear();
+ return Result;
+}
+
+template <class IntPtrT>
+void InstrProfCorrelatorImpl<IntPtrT>::addProbe(StringRef FunctionName,
+ uint64_t CFGHash,
+ IntPtrT CounterOffset,
+ IntPtrT FunctionPtr,
+ uint32_t NumCounters) {
+ Data.push_back({
+ maybeSwap<uint64_t>(IndexedInstrProf::ComputeHash(FunctionName)),
+ maybeSwap<uint64_t>(CFGHash),
+ // In this mode, CounterPtr actually stores the section relative address
+ // of the counter.
+ maybeSwap<IntPtrT>(CounterOffset),
+ maybeSwap<IntPtrT>(FunctionPtr),
+ // TODO: Value profiling is not yet supported.
+ /*ValuesPtr=*/maybeSwap<IntPtrT>(0),
+ maybeSwap<uint32_t>(NumCounters),
+ /*NumValueSites=*/{maybeSwap<uint16_t>(0), maybeSwap<uint16_t>(0)},
+ });
+ Names.push_back(FunctionName.str());
+}
+
+template <class IntPtrT>
+llvm::Optional<uint64_t>
+DwarfInstrProfCorrelator<IntPtrT>::getLocation(const DWARFDie &Die) const {
+ auto Locations = Die.getLocations(dwarf::DW_AT_location);
+ if (!Locations) {
+ consumeError(Locations.takeError());
+ return {};
+ }
+ auto &DU = *Die.getDwarfUnit();
+ for (auto &Location : *Locations) {
+ auto AddressSize = DU.getAddressByteSize();
+ DataExtractor Data(Location.Expr, DICtx->isLittleEndian(), AddressSize);
+ DWARFExpression Expr(Data, AddressSize);
+ for (auto &Op : Expr)
+ if (Op.getCode() == dwarf::DW_OP_addr)
+ return Op.getRawOperand(0);
+ }
+ return {};
+}
+
+template <class IntPtrT>
+bool DwarfInstrProfCorrelator<IntPtrT>::isDIEOfProbe(const DWARFDie &Die) {
+ const auto &ParentDie = Die.getParent();
+ if (!Die.isValid() || !ParentDie.isValid() || Die.isNULL())
+ return false;
+ if (Die.getTag() != dwarf::DW_TAG_variable)
+ return false;
+ if (!ParentDie.isSubprogramDIE())
+ return false;
+ if (!Die.hasChildren())
+ return false;
+ if (const char *Name = Die.getName(DINameKind::ShortName))
+ return StringRef(Name).startswith(getInstrProfCountersVarPrefix());
+ return false;
+}
+
+template <class IntPtrT>
+void DwarfInstrProfCorrelator<IntPtrT>::correlateProfileDataImpl() {
+ auto maybeAddProbe = [&](DWARFDie Die) {
+ if (!isDIEOfProbe(Die))
+ return;
+ Optional<const char *> FunctionName;
+ Optional<uint64_t> CFGHash;
+ Optional<uint64_t> CounterPtr = getLocation(Die);
+ auto FunctionPtr =
+ dwarf::toAddress(Die.getParent().find(dwarf::DW_AT_low_pc));
+ Optional<uint64_t> NumCounters;
+ for (const DWARFDie &Child : Die.children()) {
+ if (Child.getTag() != dwarf::DW_TAG_LLVM_annotation)
+ continue;
+ auto AnnotationFormName = Child.find(dwarf::DW_AT_name);
+ auto AnnotationFormValue = Child.find(dwarf::DW_AT_const_value);
+ if (!AnnotationFormName || !AnnotationFormValue)
+ continue;
+ auto AnnotationNameOrErr = AnnotationFormName->getAsCString();
+ if (auto Err = AnnotationNameOrErr.takeError()) {
+ consumeError(std::move(Err));
+ continue;
+ }
+ StringRef AnnotationName = *AnnotationNameOrErr;
+ if (AnnotationName.compare(
+ InstrProfCorrelator::FunctionNameAttributeName) == 0) {
+ if (auto EC =
+ AnnotationFormValue->getAsCString().moveInto(FunctionName))
+ consumeError(std::move(EC));
+ } else if (AnnotationName.compare(
+ InstrProfCorrelator::CFGHashAttributeName) == 0) {
+ CFGHash = AnnotationFormValue->getAsUnsignedConstant();
+ } else if (AnnotationName.compare(
+ InstrProfCorrelator::NumCountersAttributeName) == 0) {
+ NumCounters = AnnotationFormValue->getAsUnsignedConstant();
+ }
+ }
+ if (!FunctionName || !CFGHash || !CounterPtr || !NumCounters) {
+ LLVM_DEBUG(dbgs() << "Incomplete DIE for probe\n\tFunctionName: "
+ << FunctionName << "\n\tCFGHash: " << CFGHash
+ << "\n\tCounterPtr: " << CounterPtr
+ << "\n\tNumCounters: " << NumCounters);
+ LLVM_DEBUG(Die.dump(dbgs()));
+ return;
+ }
+ uint64_t CountersStart = this->Ctx->CountersSectionStart;
+ uint64_t CountersEnd = this->Ctx->CountersSectionEnd;
+ if (*CounterPtr < CountersStart || *CounterPtr >= CountersEnd) {
+ LLVM_DEBUG(
+ dbgs() << "CounterPtr out of range for probe\n\tFunction Name: "
+ << FunctionName << "\n\tExpected: [0x"
+ << Twine::utohexstr(CountersStart) << ", 0x"
+ << Twine::utohexstr(CountersEnd) << ")\n\tActual: 0x"
+ << Twine::utohexstr(*CounterPtr));
+ LLVM_DEBUG(Die.dump(dbgs()));
+ return;
+ }
+ if (!FunctionPtr) {
+ LLVM_DEBUG(dbgs() << "Could not find address of " << *FunctionName
+ << "\n");
+ LLVM_DEBUG(Die.dump(dbgs()));
+ }
+ this->addProbe(*FunctionName, *CFGHash, *CounterPtr - CountersStart,
+ FunctionPtr.getValueOr(0), *NumCounters);
+ };
+ for (auto &CU : DICtx->normal_units())
+ for (const auto &Entry : CU->dies())
+ maybeAddProbe(DWARFDie(CU.get(), &Entry));
+ for (auto &CU : DICtx->dwo_units())
+ for (const auto &Entry : CU->dies())
+ maybeAddProbe(DWARFDie(CU.get(), &Entry));
+}
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 885c1fe49240..37cdf4dd1fe2 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -52,16 +52,19 @@ static Error initializeReader(InstrProfReader &Reader) {
}
Expected<std::unique_ptr<InstrProfReader>>
-InstrProfReader::create(const Twine &Path) {
+InstrProfReader::create(const Twine &Path,
+ const InstrProfCorrelator *Correlator) {
// Set up the buffer to read.
auto BufferOrError = setupMemoryBuffer(Path);
if (Error E = BufferOrError.takeError())
return std::move(E);
- return InstrProfReader::create(std::move(BufferOrError.get()));
+ return InstrProfReader::create(std::move(BufferOrError.get()), Correlator);
}
Expected<std::unique_ptr<InstrProfReader>>
-InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
+InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
+ const InstrProfCorrelator *Correlator) {
+ // Sanity check the buffer.
if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
return make_error<InstrProfError>(instrprof_error::too_large);
@@ -73,9 +76,9 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
if (IndexedInstrProfReader::hasFormat(*Buffer))
Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
else if (RawInstrProfReader64::hasFormat(*Buffer))
- Result.reset(new RawInstrProfReader64(std::move(Buffer)));
+ Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator));
else if (RawInstrProfReader32::hasFormat(*Buffer))
- Result.reset(new RawInstrProfReader32(std::move(Buffer)));
+ Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator));
else if (TextInstrProfReader::hasFormat(*Buffer))
Result.reset(new TextInstrProfReader(std::move(Buffer)));
else
@@ -352,7 +355,7 @@ Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
- if (Error E = Symtab.create(StringRef(NamesStart, NamesSize)))
+ if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart)))
return error(std::move(E));
for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
const IntPtrT FPtr = swap(I->FunctionPointer);
@@ -369,6 +372,10 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
Version = swap(Header.Version);
if (GET_VERSION(Version) != RawInstrProf::Version)
return error(instrprof_error::unsupported_version);
+ if (useDebugInfoCorrelate() && !Correlator)
+ return error(instrprof_error::missing_debug_info_for_correlation);
+ if (!useDebugInfoCorrelate() && Correlator)
+ return error(instrprof_error::unexpected_debug_info_for_correlation);
BinaryIdsSize = swap(Header.BinaryIdsSize);
if (BinaryIdsSize % sizeof(uint64_t))
@@ -380,7 +387,7 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
auto CountersSize = swap(Header.CountersSize);
auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
- NamesSize = swap(Header.NamesSize);
+ auto NamesSize = swap(Header.NamesSize);
ValueKindLast = swap(Header.ValueKindLast);
auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>);
@@ -398,15 +405,27 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
return error(instrprof_error::bad_header);
- Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
- Start + DataOffset);
- DataEnd = Data + DataSize;
+ if (Correlator) {
+ // These sizes in the raw file are zero because we constructed them in the
+ // Correlator.
+ assert(DataSize == 0 && NamesSize == 0);
+ assert(CountersDelta == 0 && NamesDelta == 0);
+ Data = Correlator->getDataPointer();
+ DataEnd = Data + Correlator->getDataSize();
+ NamesStart = Correlator->getCompressedNamesPointer();
+ NamesEnd = NamesStart + Correlator->getCompressedNamesSize();
+ } else {
+ Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
+ Start + DataOffset);
+ DataEnd = Data + DataSize;
+ NamesStart = Start + NamesOffset;
+ NamesEnd = NamesStart + NamesSize;
+ }
// Binary ids start just after the header.
BinaryIdsStart =
reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header);
CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
- NamesStart = Start + NamesOffset;
ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
@@ -440,45 +459,50 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts(
if (NumCounters == 0)
return error(instrprof_error::malformed, "number of counters is zero");
- IntPtrT CounterPtr = Data->CounterPtr;
- auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
- ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart;
-
- // Check bounds. Note that the counter pointer embedded in the data record
- // may itself be corrupt.
- if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters)
- return error(instrprof_error::malformed,
- "counter pointer is out of bounds");
-
- // We need to compute the in-buffer counter offset from the in-memory address
- // distance. The initial CountersDelta is the in-memory address difference
- // start(__llvm_prf_cnts)-start(__llvm_prf_data), so SrcData->CounterPtr -
- // CountersDelta computes the offset into the in-buffer counter section.
- //
- // CountersDelta decreases as we advance to the next data record.
- ptrdiff_t CounterOffset = getCounterOffset(CounterPtr);
- CountersDelta -= sizeof(*Data);
- if (CounterOffset < 0)
- return error(
- instrprof_error::malformed,
- ("counter offset " + Twine(CounterOffset) + " is negative").str());
+ ArrayRef<uint64_t> RawCounts;
+ if (Correlator) {
+ uint64_t CounterOffset = swap<IntPtrT>(Data->CounterPtr) / sizeof(uint64_t);
+ RawCounts =
+ makeArrayRef<uint64_t>(CountersStart + CounterOffset, NumCounters);
+ } else {
+ IntPtrT CounterPtr = Data->CounterPtr;
+ ptrdiff_t CounterOffset = getCounterOffset(CounterPtr);
+ if (CounterOffset < 0)
+ return error(
+ instrprof_error::malformed,
+ ("counter offset " + Twine(CounterOffset) + " is negative").str());
- if (CounterOffset > MaxNumCounters)
- return error(instrprof_error::malformed,
- ("counter offset " + Twine(CounterOffset) +
- " is greater than the maximum number of counters " +
- Twine((uint32_t)MaxNumCounters))
- .str());
+ // Check bounds. Note that the counter pointer embedded in the data record
+ // may itself be corrupt.
+ auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
+ ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart;
+ if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters)
+ return error(instrprof_error::malformed,
+ "counter pointer is out of bounds");
+ // We need to compute the in-buffer counter offset from the in-memory
+ // address distance. The initial CountersDelta is the in-memory address
+ // difference start(__llvm_prf_cnts)-start(__llvm_prf_data), so
+ // SrcData->CounterPtr - CountersDelta computes the offset into the
+ // in-buffer counter section.
+ if (CounterOffset > MaxNumCounters)
+ return error(instrprof_error::malformed,
+ ("counter offset " + Twine(CounterOffset) +
+ " is greater than the maximum number of counters " +
+ Twine((uint32_t)MaxNumCounters))
+ .str());
- if (((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters)
- return error(instrprof_error::malformed,
- ("number of counters " +
- Twine(((uint32_t)CounterOffset + NumCounters)) +
- " is greater than the maximum number of counters " +
- Twine((uint32_t)MaxNumCounters))
- .str());
+ if (((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters)
+ return error(instrprof_error::malformed,
+ ("number of counters " +
+ Twine(((uint32_t)CounterOffset + NumCounters)) +
+ " is greater than the maximum number of counters " +
+ Twine((uint32_t)MaxNumCounters))
+ .str());
+ // CountersDelta decreases as we advance to the next data record.
+ CountersDelta -= sizeof(*Data);
- auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters);
+ RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters);
+ }
if (ShouldSwapBytes) {
Record.Counts.clear();
@@ -977,11 +1001,10 @@ IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
if (Err)
return std::move(Err);
// Found it. Look for counters with the right hash.
- for (unsigned I = 0, E = Data.size(); I < E; ++I) {
+ for (const NamedInstrProfRecord &I : Data) {
// Check for a match and fill the vector if there is one.
- if (Data[I].Hash == FuncHash) {
- return std::move(Data[I]);
- }
+ if (I.Hash == FuncHash)
+ return std::move(I);
}
return error(instrprof_error::hash_mismatch);
}
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 492e3541cb5a..6628eea80640 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -32,6 +32,7 @@
#include <vector>
using namespace llvm;
+extern cl::opt<bool> DebugInfoCorrelate;
// A struct to define how the data stream should be patched. For Indexed
// profiling, only uint64_t data type is needed.
diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index f54df7b295e3..bbb640cfaee8 100644
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -194,7 +194,7 @@ SampleProfileSummaryBuilder::computeSummaryForProfiles(
// more function profiles each with lower counts, which in turn leads to lower
// hot thresholds. To compensate for that, by default we merge context
// profiles before computing profile summary.
- if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
+ if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCSFlat &&
!UseContextLessSummary.getNumOccurrences())) {
for (const auto &I : Profiles) {
ContextLessProfiles[I.second.getName()].merge(I.second);
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index fd8fd3b675b7..9b01a386a360 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -35,11 +35,18 @@ static cl::opt<uint64_t> ProfileSymbolListCutOff(
cl::desc("Cutoff value about how many symbols in profile symbol list "
"will be used. This is very useful for performance debugging"));
+cl::opt<bool> GenerateMergedBaseProfiles(
+ "generate-merged-base-profiles", cl::init(true), cl::ZeroOrMore,
+ cl::desc("When generating nested context-sensitive profiles, always "
+ "generate extra base profile for function with all its context "
+ "profiles merged into it."));
+
namespace llvm {
namespace sampleprof {
SampleProfileFormat FunctionSamples::Format;
bool FunctionSamples::ProfileIsProbeBased = false;
-bool FunctionSamples::ProfileIsCS = false;
+bool FunctionSamples::ProfileIsCSFlat = false;
+bool FunctionSamples::ProfileIsCSNested = false;
bool FunctionSamples::UseMD5 = false;
bool FunctionSamples::HasUniqSuffix = true;
bool FunctionSamples::ProfileIsFS = false;
@@ -218,8 +225,9 @@ unsigned FunctionSamples::getOffset(const DILocation *DIL) {
0xffff;
}
-LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL) {
- if (FunctionSamples::ProfileIsProbeBased)
+LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL,
+ bool ProfileIsFS) {
+ if (FunctionSamples::ProfileIsProbeBased) {
// In a pseudo-probe based profile, a callsite is simply represented by the
// ID of the probe associated with the call instruction. The probe ID is
// encoded in the Discriminator field of the call instruction's debug
@@ -227,9 +235,19 @@ LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL) {
return LineLocation(PseudoProbeDwarfDiscriminator::extractProbeIndex(
DIL->getDiscriminator()),
0);
- else
- return LineLocation(FunctionSamples::getOffset(DIL),
- DIL->getBaseDiscriminator());
+ } else {
+ unsigned Discriminator =
+ ProfileIsFS ? DIL->getDiscriminator() : DIL->getBaseDiscriminator();
+ return LineLocation(FunctionSamples::getOffset(DIL), Discriminator);
+ }
+}
+
+uint64_t FunctionSamples::getCallSiteHash(StringRef CalleeName,
+ const LineLocation &Callsite) {
+ uint64_t NameHash = std::hash<std::string>{}(CalleeName.str());
+ uint64_t LocId =
+ (((uint64_t)Callsite.LineOffset) << 32) | Callsite.Discriminator;
+ return NameHash + (LocId << 5) + LocId;
}
const FunctionSamples *FunctionSamples::findFunctionSamples(
@@ -239,21 +257,16 @@ const FunctionSamples *FunctionSamples::findFunctionSamples(
const DILocation *PrevDIL = DIL;
for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
- unsigned Discriminator;
- if (ProfileIsFS)
- Discriminator = DIL->getDiscriminator();
- else
- Discriminator = DIL->getBaseDiscriminator();
-
// Use C++ linkage name if possible.
StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName();
if (Name.empty())
Name = PrevDIL->getScope()->getSubprogram()->getName();
-
- S.push_back(
- std::make_pair(LineLocation(getOffset(DIL), Discriminator), Name));
+ S.emplace_back(FunctionSamples::getCallSiteIdentifier(
+ DIL, FunctionSamples::ProfileIsFS),
+ Name);
PrevDIL = DIL;
}
+
if (S.size() == 0)
return this;
const FunctionSamples *FS = this;
@@ -454,3 +467,81 @@ void ProfileSymbolList::dump(raw_ostream &OS) const {
for (auto &Sym : SortedList)
OS << Sym << "\n";
}
+
+CSProfileConverter::FrameNode *
+CSProfileConverter::FrameNode::getOrCreateChildFrame(
+ const LineLocation &CallSite, StringRef CalleeName) {
+ uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
+ auto It = AllChildFrames.find(Hash);
+ if (It != AllChildFrames.end()) {
+ assert(It->second.FuncName == CalleeName &&
+ "Hash collision for child context node");
+ return &It->second;
+ }
+
+ AllChildFrames[Hash] = FrameNode(CalleeName, nullptr, CallSite);
+ return &AllChildFrames[Hash];
+}
+
+CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles)
+ : ProfileMap(Profiles) {
+ for (auto &FuncSample : Profiles) {
+ FunctionSamples *FSamples = &FuncSample.second;
+ auto *NewNode = getOrCreateContextPath(FSamples->getContext());
+ assert(!NewNode->FuncSamples && "New node cannot have sample profile");
+ NewNode->FuncSamples = FSamples;
+ }
+}
+
+CSProfileConverter::FrameNode *
+CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
+ auto Node = &RootFrame;
+ LineLocation CallSiteLoc(0, 0);
+ for (auto &Callsite : Context.getContextFrames()) {
+ Node = Node->getOrCreateChildFrame(CallSiteLoc, Callsite.FuncName);
+ CallSiteLoc = Callsite.Location;
+ }
+ return Node;
+}
+
+void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
+ // Process each child profile. Add each child profile to callsite profile map
+ // of the current node `Node` if `Node` comes with a profile. Otherwise
+ // promote the child profile to a standalone profile.
+ auto *NodeProfile = Node.FuncSamples;
+ for (auto &It : Node.AllChildFrames) {
+ auto &ChildNode = It.second;
+ convertProfiles(ChildNode);
+ auto *ChildProfile = ChildNode.FuncSamples;
+ if (!ChildProfile)
+ continue;
+ SampleContext OrigChildContext = ChildProfile->getContext();
+ // Reset the child context to be contextless.
+ ChildProfile->getContext().setName(OrigChildContext.getName());
+ if (NodeProfile) {
+ // Add child profile to the callsite profile map.
+ auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
+ SamplesMap.emplace(OrigChildContext.getName().str(), *ChildProfile);
+ NodeProfile->addTotalSamples(ChildProfile->getTotalSamples());
+ }
+
+ // Separate child profile to be a standalone profile, if the current parent
+ // profile doesn't exist. This is a duplicating operation when the child
+ // profile is already incorporated into the parent which is still useful and
+ // thus done optionally. It is seen that duplicating context profiles into
+ // base profiles improves the code quality for thinlto build by allowing a
+ // profile in the prelink phase for to-be-fully-inlined functions.
+ if (!NodeProfile || GenerateMergedBaseProfiles)
+ ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
+
+ // Contexts coming with a `ContextShouldBeInlined` attribute indicate this
+ // is a preinliner-computed profile.
+ if (OrigChildContext.hasAttribute(ContextShouldBeInlined))
+ FunctionSamples::ProfileIsCSNested = true;
+
+ // Remove the original child profile.
+ ProfileMap.erase(OrigChildContext);
+ }
+}
+
+void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); }
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index eefb7c2ba627..da16309fb82c 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -146,7 +146,7 @@ static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
if (Depth == 0)
return false;
- if (Depth == 1 && Input[Depth] == '!') {
+ if (Input[Depth] == '!') {
LineTy = LineType::Metadata;
return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
}
@@ -244,11 +244,11 @@ std::error_code SampleProfileReaderText::readImpl() {
sampleprof_error Result = sampleprof_error::success;
InlineCallStack InlineStack;
- uint32_t ProbeProfileCount = 0;
+ uint32_t TopLevelProbeProfileCount = 0;
- // SeenMetadata tracks whether we have processed metadata for the current
- // top-level function profile.
- bool SeenMetadata = false;
+ // DepthMetadata tracks whether we have processed metadata for the current
+ // top-level or nested function profile.
+ uint32_t DepthMetadata = 0;
ProfileIsFS = ProfileIsFSDisciminator;
FunctionSamples::ProfileIsFS = ProfileIsFS;
@@ -275,7 +275,7 @@ std::error_code SampleProfileReaderText::readImpl() {
"Expected 'mangled_name:NUM:NUM', found " + *LineIt);
return sampleprof_error::malformed;
}
- SeenMetadata = false;
+ DepthMetadata = 0;
SampleContext FContext(FName, CSNameTable);
if (FContext.hasContext())
++CSProfileCount;
@@ -302,7 +302,7 @@ std::error_code SampleProfileReaderText::readImpl() {
*LineIt);
return sampleprof_error::malformed;
}
- if (SeenMetadata && LineTy != LineType::Metadata) {
+ if (LineTy != LineType::Metadata && Depth == DepthMetadata) {
// Metadata must be put at the end of a function profile.
reportError(LineIt.line_number(),
"Found non-metadata after metadata: " + *LineIt);
@@ -322,6 +322,7 @@ std::error_code SampleProfileReaderText::readImpl() {
FSamples.setName(FName);
MergeResult(Result, FSamples.addTotalSamples(NumSamples));
InlineStack.push_back(&FSamples);
+ DepthMetadata = 0;
break;
}
case LineType::BodyProfile: {
@@ -342,11 +343,13 @@ std::error_code SampleProfileReaderText::readImpl() {
FunctionSamples &FProfile = *InlineStack.back();
if (FunctionHash) {
FProfile.setFunctionHash(FunctionHash);
- ++ProbeProfileCount;
+ if (Depth == 1)
+ ++TopLevelProbeProfileCount;
}
- if (Attributes)
- FProfile.getContext().setAllAttributes(Attributes);
- SeenMetadata = true;
+ FProfile.getContext().setAllAttributes(Attributes);
+ if (Attributes & (uint32_t)ContextShouldBeInlined)
+ ProfileIsCSNested = true;
+ DepthMetadata = Depth;
break;
}
}
@@ -355,12 +358,14 @@ std::error_code SampleProfileReaderText::readImpl() {
assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
"Cannot have both context-sensitive and regular profile");
- ProfileIsCS = (CSProfileCount > 0);
- assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
+ ProfileIsCSFlat = (CSProfileCount > 0);
+ assert((TopLevelProbeProfileCount == 0 ||
+ TopLevelProbeProfileCount == Profiles.size()) &&
"Cannot have both probe-based profiles and regular profiles");
- ProfileIsProbeBased = (ProbeProfileCount > 0);
+ ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
- FunctionSamples::ProfileIsCS = ProfileIsCS;
+ FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat;
+ FunctionSamples::ProfileIsCSNested = ProfileIsCSNested;
if (Result == sampleprof_error::success)
computeSummary();
@@ -625,7 +630,7 @@ SampleProfileReaderExtBinaryBase::readContextFromTable() {
ErrorOr<SampleContext>
SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
- if (ProfileIsCS) {
+ if (ProfileIsCSFlat) {
auto FContext(readContextFromTable());
if (std::error_code EC = FContext.getError())
return EC;
@@ -649,7 +654,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
Summary->setPartialProfile(true);
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
- FunctionSamples::ProfileIsCS = ProfileIsCS = true;
+ FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat = true;
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
FunctionSamples::ProfileIsFS = ProfileIsFS = true;
break;
@@ -683,6 +688,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
ProfileIsProbeBased =
hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
+ ProfileIsCSNested =
+ hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsCSNested);
+ FunctionSamples::ProfileIsCSNested = ProfileIsCSNested;
bool HasAttribute =
hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
if (std::error_code EC = readFuncMetadata(HasAttribute))
@@ -770,7 +778,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
}
}
- if (ProfileIsCS) {
+ if (ProfileIsCSFlat) {
DenseSet<uint64_t> FuncGuidsToUse;
if (useMD5()) {
for (auto Name : FuncsToUse)
@@ -840,7 +848,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
}
assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
"Cannot have both context-sensitive and regular profile");
- assert((!CSProfileCount || ProfileIsCS) &&
+ assert((!CSProfileCount || ProfileIsCSFlat) &&
"Section flag should be consistent with actual profile");
return sampleprof_error::success;
}
@@ -1078,30 +1086,77 @@ std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
}
std::error_code
-SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
- while (Data < End) {
- auto FContext(readSampleContextFromTable());
- if (std::error_code EC = FContext.getError())
- return EC;
- bool ProfileInMap = Profiles.count(*FContext);
+SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
+ FunctionSamples *FProfile) {
+ if (Data < End) {
if (ProfileIsProbeBased) {
auto Checksum = readNumber<uint64_t>();
if (std::error_code EC = Checksum.getError())
return EC;
- if (ProfileInMap)
- Profiles[*FContext].setFunctionHash(*Checksum);
+ if (FProfile)
+ FProfile->setFunctionHash(*Checksum);
}
if (ProfileHasAttribute) {
auto Attributes = readNumber<uint32_t>();
if (std::error_code EC = Attributes.getError())
return EC;
- if (ProfileInMap)
- Profiles[*FContext].getContext().setAllAttributes(*Attributes);
+ if (FProfile)
+ FProfile->getContext().setAllAttributes(*Attributes);
+ }
+
+ if (!ProfileIsCSFlat) {
+ // Read all the attributes for inlined function calls.
+ auto NumCallsites = readNumber<uint32_t>();
+ if (std::error_code EC = NumCallsites.getError())
+ return EC;
+
+ for (uint32_t J = 0; J < *NumCallsites; ++J) {
+ auto LineOffset = readNumber<uint64_t>();
+ if (std::error_code EC = LineOffset.getError())
+ return EC;
+
+ auto Discriminator = readNumber<uint64_t>();
+ if (std::error_code EC = Discriminator.getError())
+ return EC;
+
+ auto FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
+ return EC;
+
+ FunctionSamples *CalleeProfile = nullptr;
+ if (FProfile) {
+ CalleeProfile = const_cast<FunctionSamples *>(
+ &FProfile->functionSamplesAt(LineLocation(
+ *LineOffset,
+ *Discriminator))[std::string(FContext.get().getName())]);
+ }
+ if (std::error_code EC =
+ readFuncMetadata(ProfileHasAttribute, CalleeProfile))
+ return EC;
+ }
}
}
+ return sampleprof_error::success;
+}
+
+std::error_code
+SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
+ while (Data < End) {
+ auto FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
+ return EC;
+ FunctionSamples *FProfile = nullptr;
+ auto It = Profiles.find(*FContext);
+ if (It != Profiles.end())
+ FProfile = &It->second;
+
+ if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
+ return EC;
+ }
+
assert(Data == End && "More data is read than expected");
return sampleprof_error::success;
}
@@ -1233,6 +1288,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
Flags.append("probe,");
if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
Flags.append("attr,");
+ if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsCSNested))
+ Flags.append("preinlined,");
break;
default:
break;
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 78006aab1541..6f02bd203a9f 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -172,7 +172,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncOffsetTable() {
return (std::error_code)sampleprof_error::success;
};
- if (FunctionSamples::ProfileIsCS) {
+ if (FunctionSamples::ProfileIsCSFlat) {
// Sort the contexts before writing them out. This is to help fast load all
// context profiles for a function as well as their callee contexts which
// can help profile-guided importing for ThinLTO.
@@ -195,17 +195,45 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncOffsetTable() {
}
std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
+ const FunctionSamples &FunctionProfile) {
+ auto &OS = *OutputStream;
+ if (std::error_code EC = writeContextIdx(FunctionProfile.getContext()))
+ return EC;
+
+ if (FunctionSamples::ProfileIsProbeBased)
+ encodeULEB128(FunctionProfile.getFunctionHash(), OS);
+ if (FunctionSamples::ProfileIsCSFlat || FunctionSamples::ProfileIsCSNested) {
+ encodeULEB128(FunctionProfile.getContext().getAllAttributes(), OS);
+ }
+
+ if (!FunctionSamples::ProfileIsCSFlat) {
+ // Recursively emit attributes for all callee samples.
+ uint64_t NumCallsites = 0;
+ for (const auto &J : FunctionProfile.getCallsiteSamples())
+ NumCallsites += J.second.size();
+ encodeULEB128(NumCallsites, OS);
+ for (const auto &J : FunctionProfile.getCallsiteSamples()) {
+ for (const auto &FS : J.second) {
+ LineLocation Loc = J.first;
+ encodeULEB128(Loc.LineOffset, OS);
+ encodeULEB128(Loc.Discriminator, OS);
+ if (std::error_code EC = writeFuncMetadata(FS.second))
+ return EC;
+ }
+ }
+ }
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
const SampleProfileMap &Profiles) {
- if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS)
+ if (!FunctionSamples::ProfileIsProbeBased &&
+ !FunctionSamples::ProfileIsCSFlat && !FunctionSamples::ProfileIsCSNested)
return sampleprof_error::success;
- auto &OS = *OutputStream;
for (const auto &Entry : Profiles) {
- if (std::error_code EC = writeContextIdx(Entry.second.getContext()))
+ if (std::error_code EC = writeFuncMetadata(Entry.second))
return EC;
- if (FunctionSamples::ProfileIsProbeBased)
- encodeULEB128(Entry.second.getFunctionHash(), OS);
- if (FunctionSamples::ProfileIsCS)
- encodeULEB128(Entry.second.getContext().getAllAttributes(), OS);
}
return sampleprof_error::success;
}
@@ -295,10 +323,13 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
setToCompressSection(SecProfileSymbolList);
if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased)
addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased);
- if (Type == SecProfSummary && FunctionSamples::ProfileIsCS)
- addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext);
- if (Type == SecFuncMetadata && FunctionSamples::ProfileIsCS)
+ if (Type == SecFuncMetadata && FunctionSamples::ProfileIsCSNested)
+ addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsCSNested);
+ if (Type == SecFuncMetadata &&
+ (FunctionSamples::ProfileIsCSFlat || FunctionSamples::ProfileIsCSNested))
addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute);
+ if (Type == SecProfSummary && FunctionSamples::ProfileIsCSFlat)
+ addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext);
if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
@@ -440,7 +471,7 @@ SampleProfileWriterCompactBinary::write(const SampleProfileMap &ProfileMap) {
/// it needs to be parsed by the SampleProfileReaderText class.
std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
auto &OS = *OutputStream;
- if (FunctionSamples::ProfileIsCS)
+ if (FunctionSamples::ProfileIsCSFlat)
OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples();
else
OS << S.getName() << ":" << S.getTotalSamples();
@@ -483,15 +514,14 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
}
Indent -= 1;
- if (Indent == 0) {
- if (FunctionSamples::ProfileIsProbeBased) {
- OS.indent(Indent + 1);
- OS << "!CFGChecksum: " << S.getFunctionHash() << "\n";
- }
- if (FunctionSamples::ProfileIsCS) {
- OS.indent(Indent + 1);
- OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n";
- }
+ if (FunctionSamples::ProfileIsProbeBased) {
+ OS.indent(Indent + 1);
+ OS << "!CFGChecksum: " << S.getFunctionHash() << "\n";
+ }
+
+ if (S.getContext().getAllAttributes()) {
+ OS.indent(Indent + 1);
+ OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n";
}
return sampleprof_error::success;
@@ -841,7 +871,8 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
std::unique_ptr<SampleProfileWriter> Writer;
// Currently only Text and Extended Binary format are supported for CSSPGO.
- if ((FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased) &&
+ if ((FunctionSamples::ProfileIsCSFlat ||
+ FunctionSamples::ProfileIsProbeBased) &&
(Format == SPF_Binary || Format == SPF_Compact_Binary))
return sampleprof_error::unsupported_writing_format;
diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp
index a3e41ccd199c..4bc9c8487131 100644
--- a/llvm/lib/Support/AArch64TargetParser.cpp
+++ b/llvm/lib/Support/AArch64TargetParser.cpp
@@ -240,4 +240,4 @@ AArch64::ArchKind AArch64::parseCPUArch(StringRef CPU) {
return C.ArchID;
}
return ArchKind::INVALID;
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Support/Caching.cpp b/llvm/lib/Support/Caching.cpp
index a2fe37a26617..8c685640f791 100644
--- a/llvm/lib/Support/Caching.cpp
+++ b/llvm/lib/Support/Caching.cpp
@@ -79,14 +79,13 @@ Expected<FileCache> llvm::localCache(Twine CacheNameRef,
struct CacheStream : CachedFileStream {
AddBufferFn AddBuffer;
sys::fs::TempFile TempFile;
- std::string EntryPath;
unsigned Task;
CacheStream(std::unique_ptr<raw_pwrite_stream> OS, AddBufferFn AddBuffer,
sys::fs::TempFile TempFile, std::string EntryPath,
unsigned Task)
- : CachedFileStream(std::move(OS)), AddBuffer(std::move(AddBuffer)),
- TempFile(std::move(TempFile)), EntryPath(std::move(EntryPath)),
+ : CachedFileStream(std::move(OS), std::move(EntryPath)),
+ AddBuffer(std::move(AddBuffer)), TempFile(std::move(TempFile)),
Task(Task) {}
~CacheStream() {
@@ -99,7 +98,7 @@ Expected<FileCache> llvm::localCache(Twine CacheNameRef,
// Open the file first to avoid racing with a cache pruner.
ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
MemoryBuffer::getOpenFile(
- sys::fs::convertFDToNativeFile(TempFile.FD), TempFile.TmpName,
+ sys::fs::convertFDToNativeFile(TempFile.FD), ObjectPathName,
/*FileSize=*/-1, /*RequiresNullTerminator=*/false);
if (!MBOrErr)
report_fatal_error(Twine("Failed to open new cache file ") +
@@ -115,14 +114,14 @@ Expected<FileCache> llvm::localCache(Twine CacheNameRef,
// AddBuffer a copy of the bytes we wrote in that case. We do this
// instead of just using the existing file, because the pruner might
// delete the file before we get a chance to use it.
- Error E = TempFile.keep(EntryPath);
+ Error E = TempFile.keep(ObjectPathName);
E = handleErrors(std::move(E), [&](const ECError &E) -> Error {
std::error_code EC = E.convertToErrorCode();
if (EC != errc::permission_denied)
return errorCodeToError(EC);
auto MBCopy = MemoryBuffer::getMemBufferCopy((*MBOrErr)->getBuffer(),
- EntryPath);
+ ObjectPathName);
MBOrErr = std::move(MBCopy);
// FIXME: should we consume the discard error?
@@ -133,7 +132,7 @@ Expected<FileCache> llvm::localCache(Twine CacheNameRef,
if (E)
report_fatal_error(Twine("Failed to rename temporary file ") +
- TempFile.TmpName + " to " + EntryPath + ": " +
+ TempFile.TmpName + " to " + ObjectPathName + ": " +
toString(std::move(E)) + "\n");
AddBuffer(Task, std::move(*MBOrErr));
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 5b7004c86f5a..4153a69abf5d 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -1538,10 +1538,8 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
ErrorParsing = true;
} else {
- for (SmallVectorImpl<Option *>::iterator I = SinkOpts.begin(),
- E = SinkOpts.end();
- I != E; ++I)
- (*I)->addOccurrence(i, "", StringRef(argv[i]));
+ for (Option *SinkOpt : SinkOpts)
+ SinkOpt->addOccurrence(i, "", StringRef(argv[i]));
}
continue;
}
@@ -2303,11 +2301,8 @@ protected:
// Collect registered option categories into vector in preparation for
// sorting.
- for (auto I = GlobalParser->RegisteredOptionCategories.begin(),
- E = GlobalParser->RegisteredOptionCategories.end();
- I != E; ++I) {
- SortedCategories.push_back(*I);
- }
+ for (OptionCategory *Category : GlobalParser->RegisteredOptionCategories)
+ SortedCategories.push_back(Category);
// Sort the different option categories alphabetically.
assert(SortedCategories.size() > 0 && "No option categories registered!");
@@ -2315,11 +2310,8 @@ protected:
OptionCategoryCompare);
// Create map to empty vectors.
- for (std::vector<OptionCategory *>::const_iterator
- I = SortedCategories.begin(),
- E = SortedCategories.end();
- I != E; ++I)
- CategorizedOptions[*I] = std::vector<Option *>();
+ for (OptionCategory *Category : SortedCategories)
+ CategorizedOptions[Category] = std::vector<Option *>();
// Walk through pre-sorted options and assign into categories.
// Because the options are already alphabetically sorted the
@@ -2334,23 +2326,20 @@ protected:
}
// Now do printing.
- for (std::vector<OptionCategory *>::const_iterator
- Category = SortedCategories.begin(),
- E = SortedCategories.end();
- Category != E; ++Category) {
+ for (OptionCategory *Category : SortedCategories) {
// Hide empty categories for --help, but show for --help-hidden.
- const auto &CategoryOptions = CategorizedOptions[*Category];
+ const auto &CategoryOptions = CategorizedOptions[Category];
bool IsEmptyCategory = CategoryOptions.empty();
if (!ShowHidden && IsEmptyCategory)
continue;
// Print category information.
outs() << "\n";
- outs() << (*Category)->getName() << ":\n";
+ outs() << Category->getName() << ":\n";
// Check if description is set.
- if (!(*Category)->getDescription().empty())
- outs() << (*Category)->getDescription() << "\n\n";
+ if (!Category->getDescription().empty())
+ outs() << Category->getDescription() << "\n\n";
else
outs() << "\n";
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index b8c77cf69b95..ccf6ef4bb662 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -49,14 +49,14 @@ bool zlib::isAvailable() { return true; }
Error zlib::compress(StringRef InputBuffer,
SmallVectorImpl<char> &CompressedBuffer, int Level) {
unsigned long CompressedSize = ::compressBound(InputBuffer.size());
- CompressedBuffer.reserve(CompressedSize);
+ CompressedBuffer.resize_for_overwrite(CompressedSize);
int Res =
::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
(const Bytef *)InputBuffer.data(), InputBuffer.size(), Level);
// Tell MemorySanitizer that zlib output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZLib.
__msan_unpoison(CompressedBuffer.data(), CompressedSize);
- CompressedBuffer.set_size(CompressedSize);
+ CompressedBuffer.truncate(CompressedSize);
return Res ? createError(convertZlibCodeToString(Res)) : Error::success();
}
@@ -74,10 +74,10 @@ Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
Error zlib::uncompress(StringRef InputBuffer,
SmallVectorImpl<char> &UncompressedBuffer,
size_t UncompressedSize) {
- UncompressedBuffer.reserve(UncompressedSize);
+ UncompressedBuffer.resize_for_overwrite(UncompressedSize);
Error E =
uncompress(InputBuffer, UncompressedBuffer.data(), UncompressedSize);
- UncompressedBuffer.set_size(UncompressedSize);
+ UncompressedBuffer.truncate(UncompressedSize);
return E;
}
diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp
index d8d46712a593..392c4c4890e1 100644
--- a/llvm/lib/Support/ConvertUTFWrapper.cpp
+++ b/llvm/lib/Support/ConvertUTFWrapper.cpp
@@ -103,8 +103,8 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
std::vector<UTF16> ByteSwapped;
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
- for (unsigned I = 0, E = ByteSwapped.size(); I != E; ++I)
- ByteSwapped[I] = llvm::ByteSwap_16(ByteSwapped[I]);
+ for (UTF16 &I : ByteSwapped)
+ I = llvm::ByteSwap_16(I);
Src = &ByteSwapped[0];
SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
}
diff --git a/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/llvm/lib/Support/DAGDeltaAlgorithm.cpp
index e5e6301d41cc..a6daee00bd43 100644
--- a/llvm/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/llvm/lib/Support/DAGDeltaAlgorithm.cpp
@@ -180,22 +180,19 @@ DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
DAGDeltaAlgorithm &DDA, const changeset_ty &Changes,
const std::vector<edge_ty> &Dependencies)
: DDA(DDA) {
- for (changeset_ty::const_iterator it = Changes.begin(),
- ie = Changes.end(); it != ie; ++it) {
- Predecessors.insert(std::make_pair(*it, std::vector<change_ty>()));
- Successors.insert(std::make_pair(*it, std::vector<change_ty>()));
+ for (change_ty Change : Changes) {
+ Predecessors.insert(std::make_pair(Change, std::vector<change_ty>()));
+ Successors.insert(std::make_pair(Change, std::vector<change_ty>()));
}
- for (std::vector<edge_ty>::const_iterator it = Dependencies.begin(),
- ie = Dependencies.end(); it != ie; ++it) {
- Predecessors[it->second].push_back(it->first);
- Successors[it->first].push_back(it->second);
+ for (const edge_ty &Dep : Dependencies) {
+ Predecessors[Dep.second].push_back(Dep.first);
+ Successors[Dep.first].push_back(Dep.second);
}
// Compute the roots.
- for (changeset_ty::const_iterator it = Changes.begin(),
- ie = Changes.end(); it != ie; ++it)
- if (succ_begin(*it) == succ_end(*it))
- Roots.push_back(*it);
+ for (change_ty Change : Changes)
+ if (succ_begin(Change) == succ_end(Change))
+ Roots.push_back(Change);
// Pre-compute the closure of the successor relation.
std::vector<change_ty> Worklist(Roots.begin(), Roots.end());
@@ -213,14 +210,13 @@ DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
}
// Invert to form the predecessor closure map.
- for (changeset_ty::const_iterator it = Changes.begin(),
- ie = Changes.end(); it != ie; ++it)
- PredClosure.insert(std::make_pair(*it, std::set<change_ty>()));
- for (changeset_ty::const_iterator it = Changes.begin(),
- ie = Changes.end(); it != ie; ++it)
- for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
- ie2 = succ_closure_end(*it); it2 != ie2; ++it2)
- PredClosure[*it2].insert(*it);
+ for (change_ty Change : Changes)
+ PredClosure.insert(std::make_pair(Change, std::set<change_ty>()));
+ for (change_ty Change : Changes)
+ for (succ_closure_iterator_ty it2 = succ_closure_begin(Change),
+ ie2 = succ_closure_end(Change);
+ it2 != ie2; ++it2)
+ PredClosure[*it2].insert(Change);
// Dump useful debug info.
LLVM_DEBUG({
@@ -256,13 +252,12 @@ DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
llvm::errs() << "]\n";
llvm::errs() << "Predecessor Closure:\n";
- for (changeset_ty::const_iterator it = Changes.begin(), ie = Changes.end();
- it != ie; ++it) {
- llvm::errs() << format(" %-4d: [", *it);
- for (pred_closure_iterator_ty it2 = pred_closure_begin(*it),
- ie2 = pred_closure_end(*it);
+ for (change_ty Change : Changes) {
+ llvm::errs() << format(" %-4d: [", Change);
+ for (pred_closure_iterator_ty it2 = pred_closure_begin(Change),
+ ie2 = pred_closure_end(Change);
it2 != ie2; ++it2) {
- if (it2 != pred_closure_begin(*it))
+ if (it2 != pred_closure_begin(Change))
llvm::errs() << ", ";
llvm::errs() << *it2;
}
@@ -270,13 +265,12 @@ DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
}
llvm::errs() << "Successor Closure:\n";
- for (changeset_ty::const_iterator it = Changes.begin(), ie = Changes.end();
- it != ie; ++it) {
- llvm::errs() << format(" %-4d: [", *it);
- for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
- ie2 = succ_closure_end(*it);
+ for (change_ty Change : Changes) {
+ llvm::errs() << format(" %-4d: [", Change);
+ for (succ_closure_iterator_ty it2 = succ_closure_begin(Change),
+ ie2 = succ_closure_end(Change);
it2 != ie2; ++it2) {
- if (it2 != succ_closure_begin(*it))
+ if (it2 != succ_closure_begin(Change))
llvm::errs() << ", ";
llvm::errs() << *it2;
}
@@ -291,9 +285,8 @@ bool DAGDeltaAlgorithmImpl::GetTestResult(const changeset_ty &Changes,
const changeset_ty &Required) {
changeset_ty Extended(Required);
Extended.insert(Changes.begin(), Changes.end());
- for (changeset_ty::const_iterator it = Changes.begin(),
- ie = Changes.end(); it != ie; ++it)
- Extended.insert(pred_closure_begin(*it), pred_closure_end(*it));
+ for (change_ty Change : Changes)
+ Extended.insert(pred_closure_begin(Change), pred_closure_end(Change));
if (FailedTestsCache.count(Extended))
return false;
@@ -340,9 +333,8 @@ DAGDeltaAlgorithmImpl::Run() {
// Replace the current set with the predecssors of the minimized set of
// active changes.
CurrentSet.clear();
- for (changeset_ty::const_iterator it = CurrentMinSet.begin(),
- ie = CurrentMinSet.end(); it != ie; ++it)
- CurrentSet.insert(pred_begin(*it), pred_end(*it));
+ for (change_ty CT : CurrentMinSet)
+ CurrentSet.insert(pred_begin(CT), pred_end(CT));
// FIXME: We could enforce CurrentSet intersect Required == {} here if we
// wanted to protect against cyclic graphs.
diff --git a/llvm/lib/Support/DeltaAlgorithm.cpp b/llvm/lib/Support/DeltaAlgorithm.cpp
index 6aee69f43405..a2017a10ab3f 100644
--- a/llvm/lib/Support/DeltaAlgorithm.cpp
+++ b/llvm/lib/Support/DeltaAlgorithm.cpp
@@ -57,9 +57,8 @@ DeltaAlgorithm::Delta(const changeset_ty &Changes,
// Otherwise, partition the sets if possible; if not we are done.
changesetlist_ty SplitSets;
- for (changesetlist_ty::const_iterator it = Sets.begin(),
- ie = Sets.end(); it != ie; ++it)
- Split(*it, SplitSets);
+ for (const changeset_ty &Set : Sets)
+ Split(Set, SplitSets);
if (SplitSets.size() == Sets.size())
return Changes;
diff --git a/llvm/lib/Support/HTTPClient.cpp b/llvm/lib/Support/HTTPClient.cpp
deleted file mode 100644
index 68ba56d1fe50..000000000000
--- a/llvm/lib/Support/HTTPClient.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- llvm/Support/HTTPClient.cpp - HTTP client library -------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-///
-/// This file defines the methods of the HTTPRequest, HTTPClient, and
-/// BufferedHTTPResponseHandler classes.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/HTTPClient.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Errc.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-using namespace llvm;
-
-HTTPRequest::HTTPRequest(StringRef Url) { this->Url = Url.str(); }
-
-bool operator==(const HTTPRequest &A, const HTTPRequest &B) {
- return A.Url == B.Url && A.Method == B.Method &&
- A.FollowRedirects == B.FollowRedirects;
-}
-
-HTTPResponseHandler::~HTTPResponseHandler() = default;
-
-static inline bool parseContentLengthHeader(StringRef LineRef,
- size_t &ContentLength) {
- // Content-Length is a mandatory header, and the only one we handle.
- return LineRef.consume_front("Content-Length: ") &&
- to_integer(LineRef.trim(), ContentLength, 10);
-}
-
-Error BufferedHTTPResponseHandler::handleHeaderLine(StringRef HeaderLine) {
- if (ResponseBuffer.Body)
- return Error::success();
-
- size_t ContentLength;
- if (parseContentLengthHeader(HeaderLine, ContentLength))
- ResponseBuffer.Body =
- WritableMemoryBuffer::getNewUninitMemBuffer(ContentLength);
-
- return Error::success();
-}
-
-Error BufferedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
- if (!ResponseBuffer.Body)
- return createStringError(errc::io_error,
- "Unallocated response buffer. HTTP Body data "
- "received before Content-Length header.");
- if (Offset + BodyChunk.size() > ResponseBuffer.Body->getBufferSize())
- return createStringError(errc::io_error,
- "Content size exceeds buffer size.");
- memcpy(ResponseBuffer.Body->getBufferStart() + Offset, BodyChunk.data(),
- BodyChunk.size());
- Offset += BodyChunk.size();
- return Error::success();
-}
-
-Error BufferedHTTPResponseHandler::handleStatusCode(unsigned Code) {
- ResponseBuffer.Code = Code;
- return Error::success();
-}
-
-Expected<HTTPResponseBuffer> HTTPClient::perform(const HTTPRequest &Request) {
- BufferedHTTPResponseHandler Handler;
- if (Error Err = perform(Request, Handler))
- return std::move(Err);
- return std::move(Handler.ResponseBuffer);
-}
-
-Expected<HTTPResponseBuffer> HTTPClient::get(StringRef Url) {
- HTTPRequest Request(Url);
- return perform(Request);
-}
-
-HTTPClient::HTTPClient() = default;
-
-HTTPClient::~HTTPClient() = default;
-
-bool HTTPClient::isAvailable() { return false; }
-
-void HTTPClient::cleanup() {}
-
-void HTTPClient::setTimeout(std::chrono::milliseconds Timeout) {}
-
-Error HTTPClient::perform(const HTTPRequest &Request,
- HTTPResponseHandler &Handler) {
- llvm_unreachable("No HTTP Client implementation available.");
-}
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 554e3248524c..8e154067abc0 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -420,11 +420,19 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
assert((!SelfMultiply || (LHS.One == RHS.One && LHS.Zero == RHS.Zero)) &&
"Self multiplication knownbits mismatch");
- // Compute a conservative estimate for high known-0 bits.
- unsigned LHSLeadZ = LHS.countMinLeadingZeros();
- unsigned RHSLeadZ = RHS.countMinLeadingZeros();
- unsigned LeadZ = std::max(LHSLeadZ + RHSLeadZ, BitWidth) - BitWidth;
- assert(LeadZ <= BitWidth && "More zeros than bits?");
+ // Compute the high known-0 bits by multiplying the unsigned max of each side.
+ // Conservatively, M active bits * N active bits results in M + N bits in the
+ // result. But if we know a value is a power-of-2 for example, then this
+ // computes one more leading zero.
+ // TODO: This could be generalized to number of sign bits (negative numbers).
+ APInt UMaxLHS = LHS.getMaxValue();
+ APInt UMaxRHS = RHS.getMaxValue();
+
+ // For leading zeros in the result to be valid, the unsigned max product must
+ // fit in the bitwidth (it must not overflow).
+ bool HasOverflow;
+ APInt UMaxResult = UMaxLHS.umul_ov(UMaxRHS, HasOverflow);
+ unsigned LeadZ = HasOverflow ? 0 : UMaxResult.countLeadingZeros();
// The result of the bottom bits of an integer multiply can be
// inferred by looking at the bottom bits of both operands and
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index bcf13d828a5d..d3fa3c6f065d 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -227,17 +227,20 @@ static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) {
const ssize_t ChunkSize = 4096*4;
SmallString<ChunkSize> Buffer;
+
// Read into Buffer until we hit EOF.
+ size_t Size = Buffer.size();
for (;;) {
- Buffer.reserve(Buffer.size() + ChunkSize);
+ Buffer.resize_for_overwrite(Size + ChunkSize);
Expected<size_t> ReadBytes = sys::fs::readNativeFile(
- FD, makeMutableArrayRef(Buffer.end(), ChunkSize));
+ FD, makeMutableArrayRef(Buffer.begin() + Size, ChunkSize));
if (!ReadBytes)
return errorToErrorCode(ReadBytes.takeError());
if (*ReadBytes == 0)
break;
- Buffer.set_size(Buffer.size() + *ReadBytes);
+ Size += *ReadBytes;
}
+ Buffer.truncate(Size);
return getMemBufferCopyImpl(Buffer, BufferName);
}
diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp
index ae9f03745850..254d18d797b3 100644
--- a/llvm/lib/Support/NativeFormatting.cpp
+++ b/llvm/lib/Support/NativeFormatting.cpp
@@ -168,7 +168,7 @@ void llvm::write_double(raw_ostream &S, double N, FloatStyle Style,
S << "nan";
return;
} else if (std::isinf(N)) {
- S << "INF";
+ S << (std::signbit(N) ? "-INF" : "INF");
return;
}
diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp
index 3957547dfaaa..7c99d088911c 100644
--- a/llvm/lib/Support/Path.cpp
+++ b/llvm/lib/Support/Path.cpp
@@ -474,7 +474,7 @@ StringRef parent_path(StringRef path, Style style) {
void remove_filename(SmallVectorImpl<char> &path, Style style) {
size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()), style);
if (end_pos != StringRef::npos)
- path.set_size(end_pos);
+ path.truncate(end_pos);
}
void replace_extension(SmallVectorImpl<char> &path, const Twine &extension,
@@ -486,7 +486,7 @@ void replace_extension(SmallVectorImpl<char> &path, const Twine &extension,
// Erase existing extension.
size_t pos = p.find_last_of('.');
if (pos != StringRef::npos && pos >= filename_pos(p, style))
- path.set_size(pos);
+ path.truncate(pos);
// Append '.' if needed.
if (ext.size() > 0 && ext[0] != '.')
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 8e984002f90d..e2e4340f44e9 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -61,7 +61,6 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
{"zbs", RISCVExtensionVersion{1, 0}},
{"zbt", RISCVExtensionVersion{0, 93}},
- {"zvamo", RISCVExtensionVersion{0, 10}},
{"zvlsseg", RISCVExtensionVersion{0, 10}},
{"zfhmin", RISCVExtensionVersion{0, 1}},
@@ -72,6 +71,28 @@ static bool stripExperimentalPrefix(StringRef &Ext) {
return Ext.consume_front("experimental-");
}
+// This function finds the first character that doesn't belong to a version
+// (e.g. zbe0p93 is extension 'zbe' of version '0p93'). So the function will
+// consume [0-9]*p[0-9]* starting from the backward. An extension name will not
+// end with a digit or the letter 'p', so this function will parse correctly.
+// NOTE: This function is NOT able to take empty strings or strings that only
+// have version numbers and no extension name. It assumes the extension name
+// will be at least more than one character.
+static size_t findFirstNonVersionCharacter(const StringRef &Ext) {
+ if (Ext.size() == 0)
+ llvm_unreachable("Already guarded by if-statement in ::parseArchString");
+
+ int Pos = Ext.size() - 1;
+ while (Pos > 0 && isDigit(Ext[Pos]))
+ Pos--;
+ if (Pos > 0 && Ext[Pos] == 'p' && isDigit(Ext[Pos - 1])) {
+ Pos--;
+ while (Pos > 0 && isDigit(Ext[Pos]))
+ Pos--;
+ }
+ return Pos;
+}
+
struct FindByName {
FindByName(StringRef Ext) : Ext(Ext){};
StringRef Ext;
@@ -264,10 +285,6 @@ void RISCVISAInfo::toFeatures(
if (ExtName == "zvlsseg") {
Features.push_back("+experimental-v");
Features.push_back("+experimental-zvlsseg");
- } else if (ExtName == "zvamo") {
- Features.push_back("+experimental-v");
- Features.push_back("+experimental-zvlsseg");
- Features.push_back("+experimental-zvamo");
} else if (isExperimentalExtension(ExtName)) {
Features.push_back(StrAlloc("+experimental-" + ExtName));
} else {
@@ -390,7 +407,6 @@ RISCVISAInfo::parseFeatures(unsigned XLen,
assert(XLen == 32 || XLen == 64);
std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen));
- bool HasE = false;
for (auto &Feature : Features) {
StringRef ExtName = Feature;
bool Experimental = false;
@@ -409,29 +425,19 @@ RISCVISAInfo::parseFeatures(unsigned XLen,
if (ExtensionInfoIterator == ExtensionInfos.end())
continue;
- if (Add) {
- if (ExtName == "e") {
- if (XLen != 32)
- return createStringError(
- errc::invalid_argument,
- "standard user-level extension 'e' requires 'rv32'");
- HasE = true;
- }
-
+ if (Add)
ISAInfo->addExtension(ExtName, ExtensionInfoIterator->Version.Major,
ExtensionInfoIterator->Version.Minor);
- } else
- ISAInfo->Exts.erase(ExtName.str());
- }
- if (!HasE) {
- if (auto Version = findDefaultVersion("i"))
- ISAInfo->addExtension("i", Version->Major, Version->Minor);
else
- llvm_unreachable("Default extension version for 'i' not found?");
+ ISAInfo->Exts.erase(ExtName.str());
}
+ ISAInfo->updateImplication();
ISAInfo->updateFLen();
+ if (Error Result = ISAInfo->checkDependency())
+ return std::move(Result);
+
return std::move(ISAInfo);
}
@@ -457,7 +463,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
// The canonical order specified in ISA manual.
// Ref: Table 22.1 in RISC-V User-Level ISA V2.2
StringRef StdExts = AllStdExts;
- bool HasF = false, HasD = false;
char Baseline = Arch[4];
// First letter should be 'e', 'i' or 'g'.
@@ -478,8 +483,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
case 'g':
// g = imafd
StdExts = StdExts.drop_front(4);
- HasF = true;
- HasD = true;
break;
}
@@ -560,34 +563,14 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
// The order is OK, then push it into features.
// TODO: Use version number when setting target features
- switch (C) {
- default:
- // Currently LLVM supports only "mafdcbv".
+ // Currently LLVM supports only "mafdcbv".
+ StringRef SupportedStandardExtension = "mafdcbv";
+ if (!SupportedStandardExtension.contains(C))
return createStringError(errc::invalid_argument,
"unsupported standard user-level extension '%c'",
C);
- case 'm':
- ISAInfo->addExtension("m", Major, Minor);
- break;
- case 'a':
- ISAInfo->addExtension("a", Major, Minor);
- break;
- case 'f':
- ISAInfo->addExtension("f", Major, Minor);
- HasF = true;
- break;
- case 'd':
- ISAInfo->addExtension("d", Major, Minor);
- HasD = true;
- break;
- case 'c':
- ISAInfo->addExtension("c", Major, Minor);
- break;
- case 'v':
- ISAInfo->addExtension("v", Major, Minor);
- ISAInfo->addExtension("zvlsseg", Major, Minor);
- break;
- }
+ ISAInfo->addExtension(std::string(1, C), Major, Minor);
+
// Consume full extension name and version, including any optional '_'
// between this extension and the next
++I;
@@ -595,21 +578,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
if (*I == '_')
++I;
}
- // Dependency check.
- // It's illegal to specify the 'd' (double-precision floating point)
- // extension without also specifying the 'f' (single precision
- // floating-point) extension.
- // TODO: This has been removed in later specs, which specify that D implies F
- if (HasD && !HasF)
- return createStringError(errc::invalid_argument,
- "d requires f extension to also be specified");
-
- // Additional dependency checks.
- // TODO: The 'q' extension requires rv64.
- // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'.
-
- if (OtherExts.empty())
- return std::move(ISAInfo);
// Handle other types of extensions other than the standard
// general purpose and standard user-level extensions.
@@ -630,52 +598,53 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
std::array<StringRef, 4> Prefix{"z", "x", "s", "sx"};
auto I = Prefix.begin();
auto E = Prefix.end();
+ if (Split.size() > 1 || Split[0] != "") {
+ for (StringRef Ext : Split) {
+ if (Ext.empty())
+ return createStringError(errc::invalid_argument,
+ "extension name missing after separator '_'");
- for (StringRef Ext : Split) {
- if (Ext.empty())
- return createStringError(errc::invalid_argument,
- "extension name missing after separator '_'");
+ StringRef Type = getExtensionType(Ext);
+ StringRef Desc = getExtensionTypeDesc(Ext);
+ auto Pos = findFirstNonVersionCharacter(Ext) + 1;
+ StringRef Name(Ext.substr(0, Pos));
+ StringRef Vers(Ext.substr(Pos));
- StringRef Type = getExtensionType(Ext);
- StringRef Desc = getExtensionTypeDesc(Ext);
- auto Pos = Ext.find_if(isDigit);
- StringRef Name(Ext.substr(0, Pos));
- StringRef Vers(Ext.substr(Pos));
+ if (Type.empty())
+ return createStringError(errc::invalid_argument,
+ "invalid extension prefix '" + Ext + "'");
- if (Type.empty())
- return createStringError(errc::invalid_argument,
- "invalid extension prefix '" + Ext + "'");
+ // Check ISA extensions are specified in the canonical order.
+ while (I != E && *I != Type)
+ ++I;
- // Check ISA extensions are specified in the canonical order.
- while (I != E && *I != Type)
- ++I;
+ if (I == E)
+ return createStringError(errc::invalid_argument,
+ "%s not given in canonical order '%s'",
+ Desc.str().c_str(), Ext.str().c_str());
- if (I == E)
- return createStringError(errc::invalid_argument,
- "%s not given in canonical order '%s'",
- Desc.str().c_str(), Ext.str().c_str());
-
- if (Name.size() == Type.size()) {
- return createStringError(errc::invalid_argument,
- "%s name missing after '%s'", Desc.str().c_str(),
- Type.str().c_str());
- }
+ if (Name.size() == Type.size()) {
+ return createStringError(errc::invalid_argument,
+ "%s name missing after '%s'",
+ Desc.str().c_str(), Type.str().c_str());
+ }
- unsigned Major, Minor, ConsumeLength;
- if (auto E = getExtensionVersion(Name, Vers, Major, Minor, ConsumeLength,
- EnableExperimentalExtension,
- ExperimentalExtensionVersionCheck))
- return std::move(E);
+ unsigned Major, Minor, ConsumeLength;
+ if (auto E = getExtensionVersion(Name, Vers, Major, Minor, ConsumeLength,
+ EnableExperimentalExtension,
+ ExperimentalExtensionVersionCheck))
+ return std::move(E);
- // Check if duplicated extension.
- if (llvm::is_contained(AllExts, Name))
- return createStringError(errc::invalid_argument, "duplicated %s '%s'",
- Desc.str().c_str(), Name.str().c_str());
+ // Check if duplicated extension.
+ if (llvm::is_contained(AllExts, Name))
+ return createStringError(errc::invalid_argument, "duplicated %s '%s'",
+ Desc.str().c_str(), Name.str().c_str());
- ISAInfo->addExtension(Name, Major, Minor);
- // Extension format is correct, keep parsing the extensions.
- // TODO: Save Type, Name, Major, Minor to avoid parsing them later.
- AllExts.push_back(Name);
+ ISAInfo->addExtension(Name, Major, Minor);
+ // Extension format is correct, keep parsing the extensions.
+ // TODO: Save Type, Name, Major, Minor to avoid parsing them later.
+ AllExts.push_back(Name);
+ }
}
for (auto Ext : AllExts) {
@@ -686,11 +655,83 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
}
}
+ ISAInfo->updateImplication();
ISAInfo->updateFLen();
+ if (Error Result = ISAInfo->checkDependency())
+ return std::move(Result);
+
return std::move(ISAInfo);
}
+Error RISCVISAInfo::checkDependency() {
+ bool IsRv32 = XLen == 32;
+ bool HasE = Exts.count("e") == 1;
+ bool HasD = Exts.count("d") == 1;
+ bool HasF = Exts.count("f") == 1;
+
+ if (HasE && !IsRv32)
+ return createStringError(
+ errc::invalid_argument,
+ "standard user-level extension 'e' requires 'rv32'");
+
+ // It's illegal to specify the 'd' (double-precision floating point)
+ // extension without also specifying the 'f' (single precision
+ // floating-point) extension.
+ // TODO: This has been removed in later specs, which specify that D implies F
+ if (HasD && !HasF)
+ return createStringError(errc::invalid_argument,
+ "d requires f extension to also be specified");
+
+ // Additional dependency checks.
+ // TODO: The 'q' extension requires rv64.
+ // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'.
+
+ return Error::success();
+}
+
+static const char *ImpliedExtsV[] = {"zvlsseg"};
+static const char *ImpliedExtsZfh[] = {"zfhmin"};
+
+struct ImpliedExtsEntry {
+ StringLiteral Name;
+ ArrayRef<const char *> Exts;
+
+ bool operator<(const ImpliedExtsEntry &Other) const {
+ return Name < Other.Name;
+ }
+
+ bool operator<(StringRef Other) const { return Name < Other; }
+};
+
+static constexpr ImpliedExtsEntry ImpliedExts[] = {
+ {{"v"}, {ImpliedExtsV}},
+ {{"zfh"}, {ImpliedExtsZfh}},
+};
+
+void RISCVISAInfo::updateImplication() {
+ bool HasE = Exts.count("e") == 1;
+ bool HasI = Exts.count("i") == 1;
+
+ // If not in e extension and i extension does not exist, i extension is
+ // implied
+ if (!HasE && !HasI) {
+ auto Version = findDefaultVersion("i");
+ addExtension("i", Version->Major, Version->Minor);
+ }
+
+ assert(llvm::is_sorted(ImpliedExts) && "Table not sorted by Name");
+ for (auto &Ext : Exts) {
+ auto I = llvm::lower_bound(ImpliedExts, Ext.first);
+ if (I != std::end(ImpliedExts) && I->Name == Ext.first) {
+ for (auto &ImpliedExt : I->Exts) {
+ auto Version = findDefaultVersion(ImpliedExt);
+ addExtension(ImpliedExt, Version->Major, Version->Minor);
+ }
+ }
+ }
+}
+
void RISCVISAInfo::updateFLen() {
FLen = 0;
// TODO: Handle q extension.
diff --git a/llvm/lib/Support/ScopedPrinter.cpp b/llvm/lib/Support/ScopedPrinter.cpp
index 779c6c45257d..ea90a24eaced 100644
--- a/llvm/lib/Support/ScopedPrinter.cpp
+++ b/llvm/lib/Support/ScopedPrinter.cpp
@@ -43,4 +43,14 @@ void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str,
}
}
+JSONScopedPrinter::JSONScopedPrinter(
+ raw_ostream &OS, bool PrettyPrint,
+ std::unique_ptr<DelimitedScope> &&OuterScope)
+ : ScopedPrinter(OS, ScopedPrinter::ScopedPrinterKind::JSON),
+ JOS(OS, /*Indent=*/PrettyPrint ? 2 : 0),
+ OuterScope(std::move(OuterScope)) {
+ if (this->OuterScope)
+ this->OuterScope->setPrinter(*this);
+}
+
} // namespace llvm
diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp
index dd4dded4cd1d..c018dc92bf40 100644
--- a/llvm/lib/Support/Signals.cpp
+++ b/llvm/lib/Support/Signals.cpp
@@ -87,8 +87,7 @@ static CallbackAndCookie CallBacksToRun[MaxSignalHandlerCallbacks];
// Signal-safe.
void sys::RunSignalHandlers() {
- for (size_t I = 0; I < MaxSignalHandlerCallbacks; ++I) {
- auto &RunMe = CallBacksToRun[I];
+ for (CallbackAndCookie &RunMe : CallBacksToRun) {
auto Expected = CallbackAndCookie::Status::Initialized;
auto Desired = CallbackAndCookie::Status::Executing;
if (!RunMe.Flag.compare_exchange_strong(Expected, Desired))
@@ -103,8 +102,7 @@ void sys::RunSignalHandlers() {
// Signal-safe.
static void insertSignalHandler(sys::SignalHandlerCallback FnPtr,
void *Cookie) {
- for (size_t I = 0; I < MaxSignalHandlerCallbacks; ++I) {
- auto &SetMe = CallBacksToRun[I];
+ for (CallbackAndCookie &SetMe : CallBacksToRun) {
auto Expected = CallbackAndCookie::Status::Empty;
auto Desired = CallbackAndCookie::Status::Initializing;
if (!SetMe.Flag.compare_exchange_strong(Expected, Desired))
diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp
index 89b7dc939dfc..2eb2989b200b 100644
--- a/llvm/lib/Support/SourceMgr.cpp
+++ b/llvm/lib/Support/SourceMgr.cpp
@@ -292,8 +292,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
// Convert any ranges to column ranges that only intersect the line of the
// location.
- for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
- SMRange R = Ranges[i];
+ for (SMRange R : Ranges) {
if (!R.isValid())
continue;
diff --git a/llvm/lib/Support/Statistic.cpp b/llvm/lib/Support/Statistic.cpp
index d95c8642c16e..95ee885d2f8f 100644
--- a/llvm/lib/Support/Statistic.cpp
+++ b/llvm/lib/Support/Statistic.cpp
@@ -177,11 +177,10 @@ void llvm::PrintStatistics(raw_ostream &OS) {
// Figure out how long the biggest Value and Name fields are.
unsigned MaxDebugTypeLen = 0, MaxValLen = 0;
- for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
- MaxValLen = std::max(MaxValLen,
- (unsigned)utostr(Stats.Stats[i]->getValue()).size());
- MaxDebugTypeLen = std::max(MaxDebugTypeLen,
- (unsigned)std::strlen(Stats.Stats[i]->getDebugType()));
+ for (TrackingStatistic *Stat : Stats.Stats) {
+ MaxValLen = std::max(MaxValLen, (unsigned)utostr(Stat->getValue()).size());
+ MaxDebugTypeLen =
+ std::max(MaxDebugTypeLen, (unsigned)std::strlen(Stat->getDebugType()));
}
Stats.sort();
@@ -192,11 +191,9 @@ void llvm::PrintStatistics(raw_ostream &OS) {
<< "===" << std::string(73, '-') << "===\n\n";
// Print all of the statistics.
- for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i)
- OS << format("%*u %-*s - %s\n",
- MaxValLen, Stats.Stats[i]->getValue(),
- MaxDebugTypeLen, Stats.Stats[i]->getDebugType(),
- Stats.Stats[i]->getDesc());
+ for (TrackingStatistic *Stat : Stats.Stats)
+ OS << format("%*u %-*s - %s\n", MaxValLen, Stat->getValue(),
+ MaxDebugTypeLen, Stat->getDebugType(), Stat->getDesc());
OS << '\n'; // Flush the output stream.
OS.flush();
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp
index 4acc23dd455b..bc60bdea5f62 100644
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@@ -331,6 +331,21 @@ bool getCPUFeaturesExceptStdExt(CPUKind Kind,
return true;
}
+StringRef computeDefaultABIFromArch(const llvm::RISCVISAInfo &ISAInfo) {
+ if (ISAInfo.getXLen() == 32) {
+ if (ISAInfo.hasExtension("d"))
+ return "ilp32d";
+ if (ISAInfo.hasExtension("e"))
+ return "ilp32e";
+ return "ilp32";
+ } else if (ISAInfo.getXLen() == 64) {
+ if (ISAInfo.hasExtension("d"))
+ return "lp64d";
+ return "lp64";
+ }
+ llvm_unreachable("Invalid XLEN");
+}
+
} // namespace RISCV
} // namespace llvm
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index c11e16d3cf98..54ea84d4bd6d 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -21,13 +21,17 @@ using namespace llvm;
#if LLVM_ENABLE_THREADS
ThreadPool::ThreadPool(ThreadPoolStrategy S)
- : ThreadCount(S.compute_thread_count()) {
- // Create ThreadCount threads that will loop forever, wait on QueueCondition
- // for tasks to be queued or the Pool to be destroyed.
- Threads.reserve(ThreadCount);
- for (unsigned ThreadID = 0; ThreadID < ThreadCount; ++ThreadID) {
- Threads.emplace_back([S, ThreadID, this] {
- S.apply_thread_strategy(ThreadID);
+ : Strategy(S), MaxThreadCount(S.compute_thread_count()) {}
+
+void ThreadPool::grow(int requested) {
+ std::unique_lock<std::mutex> LockGuard(ThreadsLock);
+ if (Threads.size() >= MaxThreadCount)
+ return; // Already hit the max thread pool size.
+ int newThreadCount = std::min<int>(requested, MaxThreadCount);
+ while (static_cast<int>(Threads.size()) < newThreadCount) {
+ int ThreadID = Threads.size();
+ Threads.emplace_back([this, ThreadID] {
+ Strategy.apply_thread_strategy(ThreadID);
while (true) {
std::function<void()> Task;
{
@@ -73,6 +77,7 @@ void ThreadPool::wait() {
}
bool ThreadPool::isWorkerThread() const {
+ std::unique_lock<std::mutex> LockGuard(ThreadsLock);
llvm::thread::id CurrentThreadId = llvm::this_thread::get_id();
for (const llvm::thread &Thread : Threads)
if (CurrentThreadId == Thread.get_id())
@@ -87,6 +92,7 @@ ThreadPool::~ThreadPool() {
EnableFlag = false;
}
QueueCondition.notify_all();
+ std::unique_lock<std::mutex> LockGuard(ThreadsLock);
for (auto &Worker : Threads)
Worker.join();
}
@@ -94,8 +100,8 @@ ThreadPool::~ThreadPool() {
#else // LLVM_ENABLE_THREADS Disabled
// No threads are launched, issue a warning if ThreadCount is not 0
-ThreadPool::ThreadPool(ThreadPoolStrategy S)
- : ThreadCount(S.compute_thread_count()) {
+ThreadPool::ThreadPool(ThreadPoolStrategy S) : MaxThreadCount(1) {
+ int ThreadCount = S.compute_thread_count();
if (ThreadCount != 1) {
errs() << "Warning: request a ThreadPool with " << ThreadCount
<< " threads, but LLVM_ENABLE_THREADS has been turned off\n";
diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index b9a92e280576..2819dc0c139a 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -989,10 +989,9 @@ std::string Triple::normalize(StringRef Str) {
}
// Replace empty components with "unknown" value.
- for (unsigned i = 0, e = Components.size(); i < e; ++i) {
- if (Components[i].empty())
- Components[i] = "unknown";
- }
+ for (StringRef &C : Components)
+ if (C.empty())
+ C = "unknown";
// Special case logic goes here. At this point Arch, Vendor and OS have the
// correct values for the computed components.
@@ -1091,53 +1090,22 @@ StringRef Triple::getOSAndEnvironmentName() const {
return Tmp.split('-').second; // Strip second component
}
-static unsigned EatNumber(StringRef &Str) {
- assert(!Str.empty() && isDigit(Str[0]) && "Not a number");
- unsigned Result = 0;
-
- do {
- // Consume the leading digit.
- Result = Result*10 + (Str[0] - '0');
-
- // Eat the digit.
- Str = Str.substr(1);
- } while (!Str.empty() && isDigit(Str[0]));
-
- return Result;
-}
-
-static void parseVersionFromName(StringRef Name, unsigned &Major,
- unsigned &Minor, unsigned &Micro) {
- // Any unset version defaults to 0.
- Major = Minor = Micro = 0;
-
- // Parse up to three components.
- unsigned *Components[3] = {&Major, &Minor, &Micro};
- for (unsigned i = 0; i != 3; ++i) {
- if (Name.empty() || Name[0] < '0' || Name[0] > '9')
- break;
-
- // Consume the leading number.
- *Components[i] = EatNumber(Name);
-
- // Consume the separator, if present.
- if (Name.startswith("."))
- Name = Name.substr(1);
- }
+static VersionTuple parseVersionFromName(StringRef Name) {
+ VersionTuple Version;
+ Version.tryParse(Name);
+ return Version.withoutBuild();
}
-void Triple::getEnvironmentVersion(unsigned &Major, unsigned &Minor,
- unsigned &Micro) const {
+VersionTuple Triple::getEnvironmentVersion() const {
StringRef EnvironmentName = getEnvironmentName();
StringRef EnvironmentTypeName = getEnvironmentTypeName(getEnvironment());
if (EnvironmentName.startswith(EnvironmentTypeName))
EnvironmentName = EnvironmentName.substr(EnvironmentTypeName.size());
- parseVersionFromName(EnvironmentName, Major, Minor, Micro);
+ return parseVersionFromName(EnvironmentName);
}
-void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
- unsigned &Micro) const {
+VersionTuple Triple::getOSVersion() const {
StringRef OSName = getOSName();
// Assume that the OS portion of the triple starts with the canonical name.
StringRef OSTypeName = getOSTypeName(getOS());
@@ -1146,40 +1114,36 @@ void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
else if (getOS() == MacOSX)
OSName.consume_front("macos");
- parseVersionFromName(OSName, Major, Minor, Micro);
+ return parseVersionFromName(OSName);
}
-bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor,
- unsigned &Micro) const {
- getOSVersion(Major, Minor, Micro);
+bool Triple::getMacOSXVersion(VersionTuple &Version) const {
+ Version = getOSVersion();
switch (getOS()) {
default: llvm_unreachable("unexpected OS for Darwin triple");
case Darwin:
// Default to darwin8, i.e., MacOSX 10.4.
- if (Major == 0)
- Major = 8;
+ if (Version.getMajor() == 0)
+ Version = VersionTuple(8);
// Darwin version numbers are skewed from OS X versions.
- if (Major < 4)
+ if (Version.getMajor() < 4) {
return false;
- if (Major <= 19) {
- Micro = 0;
- Minor = Major - 4;
- Major = 10;
+ }
+ if (Version.getMajor() <= 19) {
+ Version = VersionTuple(10, Version.getMajor() - 4);
} else {
- Micro = 0;
- Minor = 0;
// darwin20+ corresponds to macOS 11+.
- Major = 11 + Major - 20;
+ Version = VersionTuple(11 + Version.getMajor() - 20);
}
break;
case MacOSX:
// Default to 10.4.
- if (Major == 0) {
- Major = 10;
- Minor = 4;
- } else if (Major < 10)
+ if (Version.getMajor() == 0) {
+ Version = VersionTuple(10, 4);
+ } else if (Version.getMajor() < 10) {
return false;
+ }
break;
case IOS:
case TvOS:
@@ -1188,16 +1152,13 @@ bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor,
// the clang driver combines OS X and IOS support into a common Darwin
// toolchain that wants to know the OS X version number even when targeting
// IOS.
- Major = 10;
- Minor = 4;
- Micro = 0;
+ Version = VersionTuple(10, 4);
break;
}
return true;
}
-void Triple::getiOSVersion(unsigned &Major, unsigned &Minor,
- unsigned &Micro) const {
+VersionTuple Triple::getiOSVersion() const {
switch (getOS()) {
default: llvm_unreachable("unexpected OS for Darwin triple");
case Darwin:
@@ -1206,24 +1167,21 @@ void Triple::getiOSVersion(unsigned &Major, unsigned &Minor,
// the clang driver combines OS X and IOS support into a common Darwin
// toolchain that wants to know the iOS version number even when targeting
// OS X.
- Major = 5;
- Minor = 0;
- Micro = 0;
- break;
+ return VersionTuple(5);
case IOS:
- case TvOS:
- getOSVersion(Major, Minor, Micro);
+ case TvOS: {
+ VersionTuple Version = getOSVersion();
// Default to 5.0 (or 7.0 for arm64).
- if (Major == 0)
- Major = (getArch() == aarch64) ? 7 : 5;
- break;
+ if (Version.getMajor() == 0)
+ return (getArch() == aarch64) ? VersionTuple(7) : VersionTuple(5);
+ return Version;
+ }
case WatchOS:
llvm_unreachable("conflicting triple info");
}
}
-void Triple::getWatchOSVersion(unsigned &Major, unsigned &Minor,
- unsigned &Micro) const {
+VersionTuple Triple::getWatchOSVersion() const {
switch (getOS()) {
default: llvm_unreachable("unexpected OS for Darwin triple");
case Darwin:
@@ -1232,15 +1190,13 @@ void Triple::getWatchOSVersion(unsigned &Major, unsigned &Minor,
// the clang driver combines OS X and IOS support into a common Darwin
// toolchain that wants to know the iOS version number even when targeting
// OS X.
- Major = 2;
- Minor = 0;
- Micro = 0;
- break;
- case WatchOS:
- getOSVersion(Major, Minor, Micro);
- if (Major == 0)
- Major = 2;
- break;
+ return VersionTuple(2);
+ case WatchOS: {
+ VersionTuple Version = getOSVersion();
+ if (Version.getMajor() == 0)
+ return VersionTuple(2);
+ return Version;
+ }
case IOS:
llvm_unreachable("conflicting triple info");
}
diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index 19d89db55627..f5cb5895d95d 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -590,19 +590,6 @@ std::error_code rename(const Twine &from, const Twine &to) {
}
std::error_code resize_file(int FD, uint64_t Size) {
-#if defined(HAVE_POSIX_FALLOCATE)
- // If we have posix_fallocate use it. Unlike ftruncate it always allocates
- // space, so we get an error if the disk is full.
- if (int Err = ::posix_fallocate(FD, 0, Size)) {
-#ifdef _AIX
- constexpr int NotSupportedError = ENOTSUP;
-#else
- constexpr int NotSupportedError = EOPNOTSUPP;
-#endif
- if (Err != EINVAL && Err != NotSupportedError)
- return std::error_code(Err, std::generic_category());
- }
-#endif
// Use ftruncate as a fallback. It may or may not allocate space. At least on
// OS X with HFS+ it does.
if (::ftruncate(FD, Size) == -1)
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 9bf0384b5f1b..bec4e8dbe06c 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -75,6 +75,12 @@ Status::Status(const Twine &Name, UniqueID UID, sys::TimePoint<> MTime,
: Name(Name.str()), UID(UID), MTime(MTime), User(User), Group(Group),
Size(Size), Type(Type), Perms(Perms) {}
+Status Status::copyWithNewSize(const Status &In, uint64_t NewSize) {
+ return Status(In.getName(), In.getUniqueID(), In.getLastModificationTime(),
+ In.getUser(), In.getGroup(), NewSize, In.getType(),
+ In.getPermissions());
+}
+
Status Status::copyWithNewName(const Status &In, const Twine &NewName) {
return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
In.getUser(), In.getGroup(), In.getSize(), In.getType(),
diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp
index f68ba0d065c1..2adf37a511d1 100644
--- a/llvm/lib/Support/YAMLParser.cpp
+++ b/llvm/lib/Support/YAMLParser.cpp
@@ -1876,8 +1876,8 @@ document_iterator Stream::end() {
}
void Stream::skip() {
- for (document_iterator i = begin(), e = end(); i != e; ++i)
- i->skip();
+ for (Document &Doc : *this)
+ Doc.skip();
}
Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
diff --git a/llvm/lib/TableGen/StringMatcher.cpp b/llvm/lib/TableGen/StringMatcher.cpp
index 7f30c7b60752..7474c5dfe885 100644
--- a/llvm/lib/TableGen/StringMatcher.cpp
+++ b/llvm/lib/TableGen/StringMatcher.cpp
@@ -32,8 +32,8 @@ FindFirstNonCommonLetter(const std::vector<const
// Check to see if letter i is the same across the set.
char Letter = Matches[0]->first[i];
- for (unsigned str = 0, e = Matches.size(); str != e; ++str)
- if (Matches[str]->first[i] != Letter)
+ for (const StringMatcher::StringPair *Match : Matches)
+ if (Match->first[i] != Letter)
return i;
}
@@ -75,9 +75,8 @@ bool StringMatcher::EmitStringMatcherForChar(
// Bucket the matches by the character we are comparing.
std::map<char, std::vector<const StringPair*>> MatchesByLetter;
- for (unsigned i = 0, e = Matches.size(); i != e; ++i)
- MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
-
+ for (const StringPair *Match : Matches)
+ MatchesByLetter[Match->first[CharNo]].push_back(Match);
// If we have exactly one bucket to match, see how many characters are common
// across the whole set and match all of them at once.
@@ -135,8 +134,8 @@ void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const {
// First level categorization: group strings by length.
std::map<unsigned, std::vector<const StringPair*>> MatchesByLength;
- for (unsigned i = 0, e = Matches.size(); i != e; ++i)
- MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
+ for (const StringPair &Match : Matches)
+ MatchesByLength[Match.first.size()].push_back(&Match);
// Output a switch statement on length and categorize the elements within each
// bin.
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 548e4e0c9389..cb17fd94c335 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -455,6 +455,9 @@ def FeatureEL2VMSA : SubtargetFeature<"el2vmsa", "HasEL2VMSA", "true",
def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true",
"Enable Exception Level 3">;
+def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769",
+ "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
diff --git a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
index 7fd51a98ad94..4cdf5f144437 100644
--- a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "AArch64Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -116,8 +117,13 @@ INITIALIZE_PASS(AArch64A53Fix835769, "aarch64-fix-cortex-a53-835769-pass",
bool
AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) {
LLVM_DEBUG(dbgs() << "***** AArch64A53Fix835769 *****\n");
+ auto &STI = F.getSubtarget<AArch64Subtarget>();
+ // Fix not requested, skip pass.
+ if (!STI.fixCortexA53_835769())
+ return false;
+
bool Changed = false;
- TII = F.getSubtarget().getInstrInfo();
+ TII = STI.getInstrInfo();
for (auto &MBB : F) {
Changed |= runOnBasicBlock(MBB);
diff --git a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index cd67e058a9c1..9e31243cd696 100644
--- a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -398,8 +398,8 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
TII = mf.getSubtarget().getInstrInfo();
// Just check things on a one-block-at-a-time basis.
- for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
- if (processMachineBasicBlock(&*I))
+ for (MachineBasicBlock &MBB : mf)
+ if (processMachineBasicBlock(&MBB))
Changed = true;
return Changed;
}
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index aeebb49675b2..85a9c04a3fef 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -73,6 +73,7 @@ class AArch64AsmPrinter : public AsmPrinter {
StackMaps SM;
FaultMaps FM;
const AArch64Subtarget *STI;
+ bool ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = false;
public:
AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
@@ -186,6 +187,10 @@ private:
using MInstToMCSymbol = std::map<const MachineInstr *, MCSymbol *>;
MInstToMCSymbol LOHInstToLabel;
+
+ bool shouldEmitWeakSwiftAsyncExtendedFramePointerFlags() const override {
+ return ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags;
+ }
};
} // end anonymous namespace
@@ -1132,6 +1137,15 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
+ if (MI->getOpcode() == AArch64::ADRP) {
+ for (auto &Opd : MI->operands()) {
+ if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) ==
+ "swift_async_extendedFramePointerFlags") {
+ ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
+ }
+ }
+ }
+
if (AArch64FI->getLOHRelated().count(MI)) {
// Generate a label for LOH related instruction
MCSymbol *LOHLabel = createTempSymbol("loh");
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index d2097f7e6ee3..1994e0eb7fb9 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -196,6 +196,13 @@ def mutate_anyext_to_zext : GICombineRule<
(apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }])
>;
+def split_store_zero_128 : GICombineRule<
+ (defs root:$d),
+ (match (wip_match_opcode G_STORE):$d,
+ [{ return matchSplitStoreZero128(*${d}, MRI); }]),
+ (apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -220,6 +227,7 @@ def AArch64PostLegalizerCombinerHelper
icmp_to_true_false_known_bits, merge_unmerge,
select_combines, fold_merge_to_zext,
constant_fold, identity_combines,
- ptr_add_immed_chain, overlapping_and]> {
+ ptr_add_immed_chain, overlapping_and,
+ split_store_zero_128]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index d98a5cfd4f50..4f324198f3dc 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -51,10 +51,9 @@ static bool tryToreplicateChunks(uint64_t UImm,
++Counts[getChunk(UImm, Idx)];
// Traverse the chunks to find one which occurs more than once.
- for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
- Chunk != End; ++Chunk) {
- const uint64_t ChunkVal = Chunk->first;
- const unsigned Count = Chunk->second;
+ for (const auto &Chunk : Counts) {
+ const uint64_t ChunkVal = Chunk.first;
+ const unsigned Count = Chunk.second;
uint64_t Encoding = 0;
diff --git a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index 209f9f7255a5..793663ef97d7 100644
--- a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -138,8 +138,8 @@ bool FalkorMarkStridedAccesses::run() {
bool MadeChange = false;
for (Loop *L : LI)
- for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt)
- MadeChange |= runOnLoop(**LIt);
+ for (Loop *LIt : depth_first(L))
+ MadeChange |= runOnLoop(*LIt);
return MadeChange;
}
@@ -828,10 +828,10 @@ bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
Modified = false;
for (MachineLoop *I : LI)
- for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
+ for (MachineLoop *L : depth_first(I))
// Only process inner-loops
if (L->isInnermost())
- runOnLoop(**L, Fn);
+ runOnLoop(*L, Fn);
return Modified;
}
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index b630f4f0df5f..638e45b30d99 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3041,10 +3041,21 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
// Create a buffer of SVE objects to allocate and sort it.
SmallVector<int, 8> ObjectsToAllocate;
+ // If we have a stack protector, and we've previously decided that we have SVE
+ // objects on the stack and thus need it to go in the SVE stack area, then it
+ // needs to go first.
+ int StackProtectorFI = -1;
+ if (MFI.hasStackProtectorIndex()) {
+ StackProtectorFI = MFI.getStackProtectorIndex();
+ if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
+ ObjectsToAllocate.push_back(StackProtectorFI);
+ }
for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
unsigned StackID = MFI.getStackID(I);
if (StackID != TargetStackID::ScalableVector)
continue;
+ if (I == StackProtectorFI)
+ continue;
if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
continue;
if (MFI.isDeadObjectIndex(I))
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 72461aa1f772..e141179fb5c8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -204,6 +205,8 @@ static bool isMergePassthruOpcode(unsigned Opc) {
return false;
case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
case AArch64ISD::BSWAP_MERGE_PASSTHRU:
+ case AArch64ISD::REVH_MERGE_PASSTHRU:
+ case AArch64ISD::REVW_MERGE_PASSTHRU:
case AArch64ISD::CTLZ_MERGE_PASSTHRU:
case AArch64ISD::CTPOP_MERGE_PASSTHRU:
case AArch64ISD::DUP_MERGE_PASSTHRU:
@@ -2227,6 +2230,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::STNP)
MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::REVH_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::REVW_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
@@ -4213,6 +4218,12 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_revb:
return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_revh:
+ return DAG.getNode(AArch64ISD::REVH_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_revw:
+ return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_sxtb:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
@@ -10958,16 +10969,15 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
EVT InVT = Op.getOperand(1).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- if (InVT.isScalableVector()) {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
+ SDValue Vec0 = Op.getOperand(0);
+ SDValue Vec1 = Op.getOperand(1);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ if (InVT.isScalableVector()) {
if (!isTypeLegal(VT))
return SDValue();
- SDValue Vec0 = Op.getOperand(0);
- SDValue Vec1 = Op.getOperand(1);
-
// Ensure the subvector is half the size of the main vector.
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
return SDValue();
@@ -10997,9 +11007,18 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
return SDValue();
}
- // This will be matched by custom code during ISelDAGToDAG.
- if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
- return Op;
+ if (Idx == 0 && isPackedVectorType(VT, DAG)) {
+ // This will be matched by custom code during ISelDAGToDAG.
+ if (Vec0.isUndef())
+ return Op;
+
+ unsigned int PredPattern =
+ getSVEPredPatternFromNumElements(InVT.getVectorNumElements());
+ auto PredTy = VT.changeVectorElementType(MVT::i1);
+ SDValue PTrue = getPTrue(DAG, DL, PredTy, PredPattern);
+ SDValue ScalableVec1 = convertToScalableVector(DAG, VT, Vec1);
+ return DAG.getNode(ISD::VSELECT, DL, VT, PTrue, ScalableVec1, Vec0);
+ }
return SDValue();
}
@@ -11794,6 +11813,9 @@ bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
Base.getOperand(1).getOpcode() == ISD::SHL &&
Base.getOperand(1).hasOneUse() &&
Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
+ // It's unknown whether a scalable vector has a power-of-2 bitwidth.
+ if (Mem->getMemoryVT().isScalableVector())
+ return false;
// The shift can be combined if it matches the size of the value being
// loaded (and so reducing the width would make it not match).
uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
@@ -15820,6 +15842,23 @@ static SDValue performVectorShiftCombine(SDNode *N,
return SDValue();
}
+static SDValue performSunpkloCombine(SDNode *N, SelectionDAG &DAG) {
+ // sunpklo(sext(pred)) -> sext(extract_low_half(pred))
+ // This transform works in partnership with performSetCCPunpkCombine to
+ // remove unnecessary transfer of predicates into standard registers and back
+ if (N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND &&
+ N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
+ MVT::i1) {
+ SDValue CC = N->getOperand(0)->getOperand(0);
+ auto VT = CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext());
+ SDValue Unpk = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, CC,
+ DAG.getVectorIdxConstant(0, SDLoc(N)));
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Unpk);
+ }
+
+ return SDValue();
+}
+
/// Target-specific DAG combine function for post-increment LD1 (lane) and
/// post-increment LD1R.
static SDValue performPostLD1Combine(SDNode *N,
@@ -15982,7 +16021,9 @@ static SDValue performSTORECombine(SDNode *N,
if (DCI.isBeforeLegalizeOps() && Value.getOpcode() == ISD::FP_ROUND &&
Value.getNode()->hasOneUse() && ST->isUnindexed() &&
Subtarget->useSVEForFixedLengthVectors() &&
- Value.getValueType().isFixedLengthVector())
+ Value.getValueType().isFixedLengthVector() &&
+ Value.getValueType().getFixedSizeInBits() >=
+ Subtarget->getMinSVEVectorSizeInBits())
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
ST->getMemoryVT(), ST->getMemOperand());
@@ -16495,6 +16536,44 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) {
+ // setcc_merge_zero pred
+ // (sign_extend (extract_subvector (setcc_merge_zero ... pred ...))), 0, ne
+ // => extract_subvector (inner setcc_merge_zero)
+ SDValue Pred = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
+
+ if (Cond != ISD::SETNE || !isZerosVector(RHS.getNode()) ||
+ LHS->getOpcode() != ISD::SIGN_EXTEND)
+ return SDValue();
+
+ SDValue Extract = LHS->getOperand(0);
+ if (Extract->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Extract->getValueType(0) != N->getValueType(0) ||
+ Extract->getConstantOperandVal(1) != 0)
+ return SDValue();
+
+ SDValue InnerSetCC = Extract->getOperand(0);
+ if (InnerSetCC->getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
+ return SDValue();
+
+ // By this point we've effectively got
+ // zero_inactive_lanes_and_trunc_i1(sext_i1(A)). If we can prove A's inactive
+ // lanes are already zero then the trunc(sext()) sequence is redundant and we
+ // can operate on A directly.
+ SDValue InnerPred = InnerSetCC.getOperand(0);
+ if (Pred.getOpcode() == AArch64ISD::PTRUE &&
+ InnerPred.getOpcode() == AArch64ISD::PTRUE &&
+ Pred.getConstantOperandVal(0) == InnerPred.getConstantOperandVal(0) &&
+ Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
+ Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
+ return Extract;
+
+ return SDValue();
+}
+
static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
"Unexpected opcode!");
@@ -16513,6 +16592,9 @@ static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) {
LHS->getOperand(0)->getOperand(0) == Pred)
return LHS->getOperand(0);
+ if (SDValue V = performSetCCPunpkCombine(N, DAG))
+ return V;
+
return SDValue();
}
@@ -17343,7 +17425,8 @@ SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG,
// they can be split down into something legal.
if (DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(N0.getNode()) &&
N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
- VT.isFixedLengthVector()) {
+ VT.isFixedLengthVector() &&
+ VT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(), LN0->getBasePtr(),
@@ -17455,6 +17538,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case AArch64ISD::VASHR:
case AArch64ISD::VLSHR:
return performVectorShiftCombine(N, *this, DCI);
+ case AArch64ISD::SUNPKLO:
+ return performSunpkloCombine(N, DAG);
case ISD::INSERT_VECTOR_ELT:
return performInsertVectorEltCombine(N, DCI);
case ISD::EXTRACT_VECTOR_ELT:
@@ -18570,7 +18655,25 @@ AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
}
void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
- MF.getFrameInfo().computeMaxCallFrameSize(MF);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ // If we have any vulnerable SVE stack objects then the stack protector
+ // needs to be placed at the top of the SVE stack area, as the SVE locals
+ // are placed above the other locals, so we allocate it as if it were a
+ // scalable vector.
+ // FIXME: It may be worthwhile having a specific interface for this rather
+ // than doing it here in finalizeLowering.
+ if (MFI.hasStackProtectorIndex()) {
+ for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+ if (MFI.getStackID(i) == TargetStackID::ScalableVector &&
+ MFI.getObjectSSPLayout(i) != MachineFrameInfo::SSPLK_None) {
+ MFI.setStackID(MFI.getStackProtectorIndex(),
+ TargetStackID::ScalableVector);
+ MFI.setObjectAlignment(MFI.getStackProtectorIndex(), Align(16));
+ break;
+ }
+ }
+ }
+ MFI.computeMaxCallFrameSize(MF);
TargetLoweringBase::finalizeLowering(MF);
}
@@ -18855,10 +18958,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
SDValue Op, SelectionDAG &DAG) const {
- auto Store = cast<MaskedStoreSDNode>(Op);
-
- if (Store->isTruncatingStore())
- return SDValue();
+ auto *Store = cast<MaskedStoreSDNode>(Op);
SDLoc DL(Op);
EVT VT = Store->getValue().getValueType();
@@ -19103,7 +19203,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
if (isMergePassthruOpcode(NewOp))
Operands.push_back(DAG.getUNDEF(VT));
- return DAG.getNode(NewOp, DL, VT, Operands);
+ return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
}
// If a fixed length vector operation has no side effects when applied to
@@ -19498,6 +19598,94 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
return convertFromScalableVector(DAG, VT, Op);
}
+ for (unsigned LaneSize : {64U, 32U, 16U}) {
+ if (isREVMask(ShuffleMask, VT, LaneSize)) {
+ EVT NewVT =
+ getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize));
+ unsigned RevOp;
+ unsigned EltSz = VT.getScalarSizeInBits();
+ if (EltSz == 8)
+ RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
+ else if (EltSz == 16)
+ RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
+ else
+ RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
+
+ Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
+ Op = LowerToPredicatedOp(Op, DAG, RevOp);
+ Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
+ return convertFromScalableVector(DAG, VT, Op);
+ }
+ }
+
+ unsigned WhichResult;
+ if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
+ return convertFromScalableVector(
+ DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op2));
+
+ if (isTRNMask(ShuffleMask, VT, WhichResult)) {
+ unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
+ return convertFromScalableVector(
+ DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
+ }
+
+ if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
+ return convertFromScalableVector(
+ DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op1));
+
+ if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
+ unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
+ return convertFromScalableVector(
+ DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
+ }
+
+ // Functions like isZIPMask return true when a ISD::VECTOR_SHUFFLE's mask
+ // represents the same logical operation as performed by a ZIP instruction. In
+ // isolation these functions do not mean the ISD::VECTOR_SHUFFLE is exactly
+ // equivalent to an AArch64 instruction. There's the extra component of
+ // ISD::VECTOR_SHUFFLE's value type to consider. Prior to SVE these functions
+ // only operated on 64/128bit vector types that have a direct mapping to a
+ // target register and so an exact mapping is implied.
+ // However, when using SVE for fixed length vectors, most legal vector types
+ // are actually sub-vectors of a larger SVE register. When mapping
+ // ISD::VECTOR_SHUFFLE to an SVE instruction care must be taken to consider
+ // how the mask's indices translate. Specifically, when the mapping requires
+ // an exact meaning for a specific vector index (e.g. Index X is the last
+ // vector element in the register) then such mappings are often only safe when
+ // the exact SVE register size is know. The main exception to this is when
+ // indices are logically relative to the first element of either
+ // ISD::VECTOR_SHUFFLE operand because these relative indices don't change
+ // when converting from fixed-length to scalable vector types (i.e. the start
+ // of a fixed length vector is always the start of a scalable vector).
+ unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
+ unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
+ if (MinSVESize == MaxSVESize && MaxSVESize == VT.getSizeInBits()) {
+ if (ShuffleVectorInst::isReverseMask(ShuffleMask) && Op2.isUndef()) {
+ Op = DAG.getNode(ISD::VECTOR_REVERSE, DL, ContainerVT, Op1);
+ return convertFromScalableVector(DAG, VT, Op);
+ }
+
+ if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
+ return convertFromScalableVector(
+ DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op2));
+
+ if (isUZPMask(ShuffleMask, VT, WhichResult)) {
+ unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
+ return convertFromScalableVector(
+ DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
+ }
+
+ if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
+ return convertFromScalableVector(
+ DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op1));
+
+ if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
+ unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
+ return convertFromScalableVector(
+ DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ea884cdccd28..367ba3039a0c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -324,6 +324,8 @@ enum NodeType : unsigned {
BITREVERSE_MERGE_PASSTHRU,
BSWAP_MERGE_PASSTHRU,
+ REVH_MERGE_PASSTHRU,
+ REVW_MERGE_PASSTHRU,
CTLZ_MERGE_PASSTHRU,
CTPOP_MERGE_PASSTHRU,
DUP_MERGE_PASSTHRU,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index cd4bc8a61a8a..f8d492188744 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -387,16 +387,16 @@ def simm7s16 : Operand<i32> {
let PrintMethod = "printImmScale<16>";
}
-def am_sve_fi : ComplexPattern<i64, 2, "SelectAddrModeFrameIndexSVE", []>;
+def am_sve_fi : ComplexPattern<iPTR, 2, "SelectAddrModeFrameIndexSVE", []>;
-def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>;
-def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;
-def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
-def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;
-def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
+def am_indexed7s8 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexed7S8", []>;
+def am_indexed7s16 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexed7S16", []>;
+def am_indexed7s32 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexed7S32", []>;
+def am_indexed7s64 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexed7S64", []>;
+def am_indexed7s128 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexed7S128", []>;
-def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;
-def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;
+def am_indexedu6s128 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedU6S128", []>;
+def am_indexeds9s128 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedS9S128", []>;
def UImmS1XForm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64);
@@ -3177,18 +3177,18 @@ def maski16_or_more : Operand<i32>,
// (unsigned immediate)
// Indexed for 8-bit registers. offset is in range [0,4095].
-def am_indexed8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed8", []>;
-def am_indexed16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed16", []>;
-def am_indexed32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed32", []>;
-def am_indexed64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed64", []>;
-def am_indexed128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed128", []>;
+def am_indexed8 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexed8", []>;
+def am_indexed16 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexed16", []>;
+def am_indexed32 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexed32", []>;
+def am_indexed64 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexed64", []>;
+def am_indexed128 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexed128", []>;
// (unsigned immediate)
// Indexed for 8-bit registers. offset is in range [0,63].
-def am_indexed8_6b : ComplexPattern<i64, 2, "SelectAddrModeIndexedUImm<1,63>", []>;
-def am_indexed16_6b : ComplexPattern<i64, 2, "SelectAddrModeIndexedUImm<2,63>", []>;
-def am_indexed32_6b : ComplexPattern<i64, 2, "SelectAddrModeIndexedUImm<4,63>", []>;
-def am_indexed64_6b : ComplexPattern<i64, 2, "SelectAddrModeIndexedUImm<8,63>", []>;
+def am_indexed8_6b : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedUImm<1,63>", []>;
+def am_indexed16_6b : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedUImm<2,63>", []>;
+def am_indexed32_6b : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedUImm<4,63>", []>;
+def am_indexed64_6b : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedUImm<8,63>", []>;
def gi_am_indexed8 :
GIComplexOperandMatcher<s64, "selectAddrModeIndexed<8>">,
@@ -3358,11 +3358,11 @@ class PrefetchLiteral<bits<2> opc, bit V, string asm, list<dag> pat>
// Load/store register offset
//---
-def ro_Xindexed8 : ComplexPattern<i64, 4, "SelectAddrModeXRO<8>", []>;
-def ro_Xindexed16 : ComplexPattern<i64, 4, "SelectAddrModeXRO<16>", []>;
-def ro_Xindexed32 : ComplexPattern<i64, 4, "SelectAddrModeXRO<32>", []>;
-def ro_Xindexed64 : ComplexPattern<i64, 4, "SelectAddrModeXRO<64>", []>;
-def ro_Xindexed128 : ComplexPattern<i64, 4, "SelectAddrModeXRO<128>", []>;
+def ro_Xindexed8 : ComplexPattern<iPTR, 4, "SelectAddrModeXRO<8>", []>;
+def ro_Xindexed16 : ComplexPattern<iPTR, 4, "SelectAddrModeXRO<16>", []>;
+def ro_Xindexed32 : ComplexPattern<iPTR, 4, "SelectAddrModeXRO<32>", []>;
+def ro_Xindexed64 : ComplexPattern<iPTR, 4, "SelectAddrModeXRO<64>", []>;
+def ro_Xindexed128 : ComplexPattern<iPTR, 4, "SelectAddrModeXRO<128>", []>;
def gi_ro_Xindexed8 :
GIComplexOperandMatcher<s64, "selectAddrModeXRO<8>">,
@@ -3380,11 +3380,11 @@ def gi_ro_Xindexed128 :
GIComplexOperandMatcher<s64, "selectAddrModeXRO<128>">,
GIComplexPatternEquiv<ro_Xindexed128>;
-def ro_Windexed8 : ComplexPattern<i64, 4, "SelectAddrModeWRO<8>", []>;
-def ro_Windexed16 : ComplexPattern<i64, 4, "SelectAddrModeWRO<16>", []>;
-def ro_Windexed32 : ComplexPattern<i64, 4, "SelectAddrModeWRO<32>", []>;
-def ro_Windexed64 : ComplexPattern<i64, 4, "SelectAddrModeWRO<64>", []>;
-def ro_Windexed128 : ComplexPattern<i64, 4, "SelectAddrModeWRO<128>", []>;
+def ro_Windexed8 : ComplexPattern<iPTR, 4, "SelectAddrModeWRO<8>", []>;
+def ro_Windexed16 : ComplexPattern<iPTR, 4, "SelectAddrModeWRO<16>", []>;
+def ro_Windexed32 : ComplexPattern<iPTR, 4, "SelectAddrModeWRO<32>", []>;
+def ro_Windexed64 : ComplexPattern<iPTR, 4, "SelectAddrModeWRO<64>", []>;
+def ro_Windexed128 : ComplexPattern<iPTR, 4, "SelectAddrModeWRO<128>", []>;
def gi_ro_Windexed8 :
GIComplexOperandMatcher<s64, "selectAddrModeWRO<8>">,
@@ -3880,11 +3880,11 @@ multiclass PrefetchRO<bits<2> sz, bit V, bits<2> opc, string asm> {
// Load/store unscaled immediate
//---
-def am_unscaled8 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled8", []>;
-def am_unscaled16 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled16", []>;
-def am_unscaled32 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled32", []>;
-def am_unscaled64 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled64", []>;
-def am_unscaled128 :ComplexPattern<i64, 2, "SelectAddrModeUnscaled128", []>;
+def am_unscaled8 : ComplexPattern<iPTR, 2, "SelectAddrModeUnscaled8", []>;
+def am_unscaled16 : ComplexPattern<iPTR, 2, "SelectAddrModeUnscaled16", []>;
+def am_unscaled32 : ComplexPattern<iPTR, 2, "SelectAddrModeUnscaled32", []>;
+def am_unscaled64 : ComplexPattern<iPTR, 2, "SelectAddrModeUnscaled64", []>;
+def am_unscaled128 :ComplexPattern<iPTR, 2, "SelectAddrModeUnscaled128", []>;
def gi_am_unscaled8 :
GIComplexOperandMatcher<s64, "selectAddrModeUnscaled8">,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index f8f8ee3f1e6c..5fc5e4e5eb35 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -7055,6 +7055,8 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
unsigned &Flags) const {
+ if (!TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags))
+ return false;
// Check if LR is available through all of the MBB. If it's not, then set
// a flag.
assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index decee117d2d5..ebccc07edc7a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4174,6 +4174,21 @@ defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
+def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
+ (CMLTv8i8rz V64:$Rn)>;
+def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
+ (CMLTv4i16rz V64:$Rn)>;
+def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
+ (CMLTv2i32rz V64:$Rn)>;
+def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
+ (CMLTv16i8rz V128:$Rn)>;
+def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
+ (CMLTv8i16rz V128:$Rn)>;
+def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
+ (CMLTv4i32rz V128:$Rn)>;
+def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
+ (CMLTv2i64rz V128:$Rn)>;
+
defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
@@ -4363,6 +4378,32 @@ def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
(v4i32 VImm8000)))),
(SQXTNv4i16 V128:$Vn)>;
+// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
+// with reversed min/max
+def : Pat<(v16i8 (concat_vectors
+ (v8i8 V64:$Vd),
+ (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
+ (v8i16 VImm7F)))))),
+ (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+def : Pat<(v16i8 (concat_vectors
+ (v8i8 V64:$Vd),
+ (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
+ (v8i16 VImm80)))))),
+ (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+
+// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
+// with reversed min/max
+def : Pat<(v8i16 (concat_vectors
+ (v4i16 V64:$Vd),
+ (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
+ (v4i32 VImm7FFF)))))),
+ (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+def : Pat<(v8i16 (concat_vectors
+ (v4i16 V64:$Vd),
+ (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
+ (v4i32 VImm8000)))))),
+ (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+
//===----------------------------------------------------------------------===//
// Advanced SIMD three vector instructions.
//===----------------------------------------------------------------------===//
@@ -4825,6 +4866,9 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;
+def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
+ (CMLTv1i64rz V64:$Rn)>;
+
def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
(FCVTASv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
@@ -5288,6 +5332,29 @@ defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
+def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
+ (v8i8 (trunc (v8i16 V128:$Vm))))),
+ (UZP1v16i8 V128:$Vn, V128:$Vm)>;
+def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
+ (v4i16 (trunc (v4i32 V128:$Vm))))),
+ (UZP1v8i16 V128:$Vn, V128:$Vm)>;
+def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
+ (v2i32 (trunc (v2i64 V128:$Vm))))),
+ (UZP1v4i32 V128:$Vn, V128:$Vm)>;
+
+def : Pat<(v16i8 (concat_vectors
+ (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
+ (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
+ (UZP2v16i8 V128:$Vn, V128:$Vm)>;
+def : Pat<(v8i16 (concat_vectors
+ (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
+ (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
+ (UZP2v8i16 V128:$Vn, V128:$Vm)>;
+def : Pat<(v4i32 (concat_vectors
+ (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
+ (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
+ (UZP2v4i32 V128:$Vn, V128:$Vm)>;
+
//----------------------------------------------------------------------------
// AdvSIMD TBL/TBX instructions
//----------------------------------------------------------------------------
@@ -6536,6 +6603,34 @@ defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
+// RADDHN patterns for when RSHRN shifts by half the size of the vector element
+def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
+ (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
+ (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
+ (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
+
+// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
+def : Pat<(v16i8 (concat_vectors
+ (v8i8 V64:$Vd),
+ (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
+ (RADDHNv8i16_v16i8
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
+ (v8i16 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v8i16 (concat_vectors
+ (v4i16 V64:$Vd),
+ (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))),
+ (RADDHNv4i32_v8i16
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
+ (v4i32 (MOVIv2d_ns (i32 0))))>;
+def : Pat<(v4i32 (concat_vectors
+ (v2i32 V64:$Vd),
+ (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))),
+ (RADDHNv2i64_v4i32
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
+ (v2i64 (MOVIv2d_ns (i32 0))))>;
+
// SHRN patterns for when a logical right shift was used instead of arithmetic
// (the immediate guarantees no sign bits actually end up in the result so it
// doesn't matter).
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 25d53f4ab065..eb55a472a69a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -136,15 +136,15 @@ def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1_PRED", SDT_AArch64_SCATTER
//
// SVE CNT/INC/RDVL
-def sve_rdvl_imm : ComplexPattern<i32, 1, "SelectRDVLImm<-32, 31, 16>">;
-def sve_cnth_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 8>">;
-def sve_cntw_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 4>">;
-def sve_cntd_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 2>">;
+def sve_rdvl_imm : ComplexPattern<i64, 1, "SelectRDVLImm<-32, 31, 16>">;
+def sve_cnth_imm : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, 8>">;
+def sve_cntw_imm : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, 4>">;
+def sve_cntd_imm : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, 2>">;
// SVE DEC
-def sve_cnth_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -8>">;
-def sve_cntw_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -4>">;
-def sve_cntd_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -2>">;
+def sve_cnth_imm_neg : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, -8>">;
+def sve_cntw_imm_neg : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, -4>">;
+def sve_cntd_imm_neg : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, -2>">;
def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>;
def AArch64faddv_p : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>;
@@ -231,6 +231,8 @@ def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Ari
def AArch64frecpx_mt : SDNode<"AArch64ISD::FRECPX_MERGE_PASSTHRU", SDT_AArch64Arith>;
def AArch64rbit_mt : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch64Arith>;
def AArch64revb_mt : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64revh_mt : SDNode<"AArch64ISD::REVH_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64revw_mt : SDNode<"AArch64ISD::REVW_MERGE_PASSTHRU", SDT_AArch64Arith>;
// These are like the above but we don't yet have need for ISD nodes. They allow
// a single pattern to match intrinsic and ISD operand layouts.
@@ -275,6 +277,11 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
return N->hasOneUse();
}]>;
+def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
+ (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
+ return N->getFlags().hasNoSignedZeros();
+}]>;
+
def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>,
SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
@@ -536,7 +543,8 @@ let Predicates = [HasSVEorStreamingSVE] in {
(!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
// Zd = -(Za + Zn * Zm)
- def : Pat<(AArch64fneg_mt PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))),
+ // (with nsz neg.)
+ def : Pat<(AArch64fneg_mt_nsz PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))),
(!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
// Zda = Zda + Zn * Zm
@@ -624,13 +632,13 @@ let Predicates = [HasSVEorStreamingSVE] in {
def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
// Duplicate Int immediate into all vector elements
- def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
+ def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))),
(DUP_ZI_B $a, $b)>;
- def : Pat<(nxv8i16 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
+ def : Pat<(nxv8i16 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))),
(DUP_ZI_H $a, $b)>;
- def : Pat<(nxv4i32 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
+ def : Pat<(nxv4i32 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))),
(DUP_ZI_S $a, $b)>;
- def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm i32:$a, i32:$b)))),
+ def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm64 i32:$a, i32:$b)))),
(DUP_ZI_D $a, $b)>;
// Duplicate immediate FP into all vector elements.
@@ -674,8 +682,8 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", AArch64rbit_mt>;
defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", AArch64revb_mt>;
- defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
- defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;
+ defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", AArch64revh_mt>;
+ defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", AArch64revw_mt>;
defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse>;
defm REV_ZZ : sve_int_perm_reverse_z<"rev", vector_reverse>;
@@ -2686,13 +2694,13 @@ let Predicates = [HasSVEorStreamingSVE] in {
// Splice with lane bigger or equal to 0
def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_255 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_255:$index)>;
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_127 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_127:$index)>;
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_63 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_63:$index)>;
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_31 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_31:$index)>;
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
} // End HasSVEorStreamingSVE
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index 5cec4cb66339..566c7a16db23 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -488,7 +488,7 @@ Instruction *AArch64StackTagging::insertBaseTaggedPointer(
void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
const Align NewAlignment =
- max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize);
+ max(MaybeAlign(Info.AI->getAlign()), kTagGranuleSize);
Info.AI->setAlignment(NewAlignment);
uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
@@ -537,15 +537,14 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
SmallVector<Instruction *, 4> UnrecognizedLifetimes;
for (auto &BB : *F) {
- for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
- Instruction *I = &*IT;
- if (auto *AI = dyn_cast<AllocaInst>(I)) {
+ for (Instruction &I : BB) {
+ if (auto *AI = dyn_cast<AllocaInst>(&I)) {
Allocas[AI].AI = AI;
Allocas[AI].OldAI = AI;
continue;
}
- if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(I)) {
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
for (Value *V : DVI->location_ops())
if (auto *AI = dyn_cast_or_null<AllocaInst>(V))
if (Allocas[AI].DbgVariableIntrinsics.empty() ||
@@ -554,12 +553,12 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
continue;
}
- auto *II = dyn_cast<IntrinsicInst>(I);
+ auto *II = dyn_cast<IntrinsicInst>(&I);
if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end)) {
AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
if (!AI) {
- UnrecognizedLifetimes.push_back(I);
+ UnrecognizedLifetimes.push_back(&I);
continue;
}
if (II->getIntrinsicID() == Intrinsic::lifetime_start)
@@ -568,8 +567,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
Allocas[AI].LifetimeEnd.push_back(II);
}
- if (isa<ReturnInst>(I) || isa<ResumeInst>(I) || isa<CleanupReturnInst>(I))
- RetVec.push_back(I);
+ if (isa<ReturnInst, ResumeInst, CleanupReturnInst>(&I))
+ RetVec.push_back(&I);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index d782d6352cbe..f7d3dd0bc222 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -346,9 +346,7 @@ bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
return false;
if (TargetTriple.isiOS()) {
- unsigned Major, Minor, Micro;
- TargetTriple.getiOSVersion(Major, Minor, Micro);
- return Major >= 8;
+ return TargetTriple.getiOSVersion() >= VersionTuple(8);
}
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 19db774ccd7b..b3cd5ebd5f65 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -116,6 +116,8 @@ protected:
bool HasFP16FML = false;
bool HasSPE = false;
+ bool FixCortexA53_835769 = false;
+
// ARMv8.1 extensions
bool HasVH = false;
bool HasPAN = false;
@@ -571,6 +573,8 @@ public:
bool hasEL2VMSA() const { return HasEL2VMSA; }
bool hasEL3() const { return HasEL3; }
+ bool fixCortexA53_835769() const { return FixCortexA53_835769; }
+
bool addrSinkUsingGEPs() const override {
// Keeping GEPs inbounds is important for exploiting AArch64
// addressing-modes in ILP32 mode.
@@ -632,8 +636,7 @@ public:
// extended frames should be flagged as present.
const Triple &TT = getTargetTriple();
- unsigned Major, Minor, Micro;
- TT.getOSVersion(Major, Minor, Micro);
+ unsigned Major = TT.getOSVersion().getMajor();
switch(TT.getOS()) {
default:
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index ce26c62af61a..4af28fc070dd 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -117,11 +117,6 @@ static cl::opt<bool>
cl::init(true), cl::Hidden);
static cl::opt<bool>
-EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
- cl::desc("Work around Cortex-A53 erratum 835769"),
- cl::init(false));
-
-static cl::opt<bool>
EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
cl::desc("Enable optimizations on complex GEPs"),
cl::init(false));
@@ -382,10 +377,9 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
unsigned MaxSVEVectorSize = 0;
Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange);
if (VScaleRangeAttr.isValid()) {
- std::tie(MinSVEVectorSize, MaxSVEVectorSize) =
- VScaleRangeAttr.getVScaleRangeArgs();
- MinSVEVectorSize *= 128;
- MaxSVEVectorSize *= 128;
+ Optional<unsigned> VScaleMax = VScaleRangeAttr.getVScaleRangeMax();
+ MinSVEVectorSize = VScaleRangeAttr.getVScaleRangeMin() * 128;
+ MaxSVEVectorSize = VScaleMax ? VScaleMax.getValue() * 128 : 0;
} else {
MinSVEVectorSize = SVEVectorBitsMinOpt;
MaxSVEVectorSize = SVEVectorBitsMaxOpt;
@@ -765,8 +759,7 @@ void AArch64PassConfig::addPreEmitPass() {
if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
- if (EnableA53Fix835769)
- addPass(createAArch64A53Fix835769());
+ addPass(createAArch64A53Fix835769());
if (EnableBranchTargets)
addPass(createAArch64BranchTargetsPass());
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 34015d2dbd49..d21854e38f5a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -30,6 +30,12 @@ using namespace llvm::PatternMatch;
static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
cl::init(true), cl::Hidden);
+static cl::opt<unsigned> SVEGatherOverhead("sve-gather-overhead", cl::init(10),
+ cl::Hidden);
+
+static cl::opt<unsigned> SVEScatterOverhead("sve-scatter-overhead",
+ cl::init(10), cl::Hidden);
+
bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
@@ -725,6 +731,22 @@ static Optional<Instruction *> instCombineSVEVectorFMLA(InstCombiner &IC,
return IC.replaceInstUsesWith(II, FMLA);
}
+static bool isAllActivePredicate(Value *Pred) {
+ // Look through convert.from.svbool(convert.to.svbool(...) chain.
+ Value *UncastedPred;
+ if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
+ m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
+ m_Value(UncastedPred)))))
+ // If the predicate has the same or less lanes than the uncasted
+ // predicate then we know the casting has no effect.
+ if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
+ cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
+ Pred = UncastedPred;
+
+ return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>()));
+}
+
static Optional<Instruction *>
instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
IRBuilder<> Builder(II.getContext());
@@ -735,8 +757,7 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
Type *VecTy = II.getType();
Value *VecPtr = Builder.CreateBitCast(PtrOp, VecTy->getPointerTo());
- if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
- m_ConstantInt<AArch64SVEPredPattern::all>()))) {
+ if (isAllActivePredicate(Pred)) {
LoadInst *Load = Builder.CreateLoad(VecTy, VecPtr);
return IC.replaceInstUsesWith(II, Load);
}
@@ -758,8 +779,7 @@ instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
Value *VecPtr =
Builder.CreateBitCast(PtrOp, VecOp->getType()->getPointerTo());
- if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
- m_ConstantInt<AArch64SVEPredPattern::all>()))) {
+ if (isAllActivePredicate(Pred)) {
Builder.CreateStore(VecOp, VecPtr);
return IC.eraseInstFromFunction(II);
}
@@ -1008,6 +1028,40 @@ static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
return None;
}
+static Optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
+ IntrinsicInst &II) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ Type *Int32Ty = Builder.getInt32Ty();
+ Value *Pred = II.getOperand(0);
+ Value *Vec = II.getOperand(1);
+ Value *DivVec = II.getOperand(2);
+
+ Value *SplatValue = getSplatValue(DivVec);
+ ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
+ if (!SplatConstantInt)
+ return None;
+ APInt Divisor = SplatConstantInt->getValue();
+
+ if (Divisor.isPowerOf2()) {
+ Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());
+ auto ASRD = Builder.CreateIntrinsic(
+ Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
+ return IC.replaceInstUsesWith(II, ASRD);
+ }
+ if (Divisor.isNegatedPowerOf2()) {
+ Divisor.negate();
+ Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());
+ auto ASRD = Builder.CreateIntrinsic(
+ Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
+ auto NEG = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_neg,
+ {ASRD->getType()}, {ASRD, Pred, ASRD});
+ return IC.replaceInstUsesWith(II, NEG);
+ }
+
+ return None;
+}
+
Optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
@@ -1068,6 +1122,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineSVELD1(IC, II, DL);
case Intrinsic::aarch64_sve_st1:
return instCombineSVEST1(IC, II, DL);
+ case Intrinsic::aarch64_sve_sdiv:
+ return instCombineSVESDIV(IC, II);
}
return None;
@@ -1746,7 +1802,7 @@ InstructionCost
AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind) {
- if (!isa<ScalableVectorType>(Src))
+ if (useNeonVector(Src))
return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
auto LT = TLI->getTypeLegalizationCost(DL, Src);
@@ -1763,6 +1819,10 @@ AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first * 2;
}
+static unsigned getSVEGatherScatterOverhead(unsigned Opcode) {
+ return Opcode == Instruction::Load ? SVEGatherOverhead : SVEScatterOverhead;
+}
+
InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
@@ -1785,6 +1845,10 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
ElementCount LegalVF = LT.second.getVectorElementCount();
InstructionCost MemOpCost =
getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
+ // Add on an overhead cost for using gathers/scatters.
+ // TODO: At the moment this is applied unilaterally for all CPUs, but at some
+ // point we may want a per-CPU overhead.
+ MemOpCost *= getSVEGatherScatterOverhead(Opcode);
return LT.first * MemOpCost * getMaxNumElements(LegalVF);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index d1e8cd204b3a..c3e1735cd4cd 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -309,6 +309,8 @@ public:
bool supportsScalableVectors() const { return ST->hasSVE(); }
+ bool enableScalableVectorization() const { return ST->hasSVE(); }
+
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 6d3aea2721de..62038b10fccd 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1031,12 +1031,7 @@ public:
if (DarwinRefKind != MCSymbolRefExpr::VK_None)
return false;
- for (unsigned i = 0; i != AllowedModifiers.size(); ++i) {
- if (ELFRefKind == AllowedModifiers[i])
- return true;
- }
-
- return false;
+ return llvm::is_contained(AllowedModifiers, ELFRefKind);
}
bool isMovWSymbolG3() const {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1524aa5eb0ec..e8894e7933d6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -785,6 +785,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.libcallFor({s128})
.minScalar(0, MinFPScalar);
+ // TODO: Vector types.
+ getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
+ .legalFor({MinFPScalar, s32, s64})
+ .minScalar(0, MinFPScalar);
+
// TODO: Libcall support for s128.
// TODO: s16 should be legal with full FP16 support.
getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index a9b3792e0118..3dec980a819a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -289,6 +289,44 @@ static void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
Observer.changedInstr(MI);
}
+/// Match a 128b store of zero and split it into two 64 bit stores, for
+/// size/performance reasons.
+static bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ GStore &Store = cast<GStore>(MI);
+ if (!Store.isSimple())
+ return false;
+ LLT ValTy = MRI.getType(Store.getValueReg());
+ if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)
+ return false;
+ if (ValTy.getSizeInBits() != Store.getMemSizeInBits())
+ return false; // Don't split truncating stores.
+ if (!MRI.hasOneNonDBGUse(Store.getValueReg()))
+ return false;
+ auto MaybeCst = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(Store.getValueReg()), MRI);
+ return MaybeCst && MaybeCst->isZero();
+}
+
+static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ GISelChangeObserver &Observer) {
+ B.setInstrAndDebugLoc(MI);
+ GStore &Store = cast<GStore>(MI);
+ assert(MRI.getType(Store.getValueReg()).isVector() &&
+ "Expected a vector store value");
+ LLT NewTy = LLT::scalar(64);
+ Register PtrReg = Store.getPointerReg();
+ auto Zero = B.buildConstant(NewTy, 0);
+ auto HighPtr = B.buildPtrAdd(MRI.getType(PtrReg), PtrReg,
+ B.buildConstant(LLT::scalar(64), 8));
+ auto &MF = *MI.getMF();
+ auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);
+ auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);
+ B.buildStore(Zero, PtrReg, *LowMMO);
+ B.buildStore(Zero, HighPtr, *HighMMO);
+ Store.eraseFromParent();
+}
+
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 40ddf6a94f73..515a5c63a559 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -430,6 +430,8 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_INTRINSIC_ROUND:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMINIMUM:
return true;
}
return false;
@@ -600,6 +602,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMINIMUM:
return getSameKindOfOperandsMapping(MI);
case TargetOpcode::G_FPEXT: {
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 90688f1a3e83..c1186ae804d2 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -239,8 +239,8 @@ void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
{codeview::RegisterId::ARM64_Q31, AArch64::Q31},
};
- for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
- MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
+ for (const auto &I : RegMap)
+ MRI->mapLLVMRegToCVReg(I.Reg, static_cast<int>(I.CVReg));
}
static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) {
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 010ffa1502de..bb488cd7da32 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -197,34 +197,42 @@ def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64
def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8>", []>;
def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", []>;
def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
-def SVEAddSubImm64Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
+def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
-def SVELogicalImm8Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i8>", []>;
-def SVELogicalImm16Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i16>", []>;
-def SVELogicalImm32Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i32>", []>;
+def SVELogicalImm8Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i8>", []>;
+def SVELogicalImm16Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16>", []>;
+def SVELogicalImm32Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
-def SVELogicalImm8NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i8, true>", []>;
-def SVELogicalImm16NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i16, true>", []>;
-def SVELogicalImm32NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
+def SVELogicalImm8NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i8, true>", []>;
+def SVELogicalImm16NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16, true>", []>;
+def SVELogicalImm32NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
def SVELogicalImm64NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64, true>", []>;
-def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
+def SVE8BitLslImm32 : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
+def SVE8BitLslImm64 : ComplexPattern<i64, 2, "SelectSVE8BitLslImm", [imm]>;
+class SVE8BitLslImm<ValueType ty> {
+ ComplexPattern Pat = !cond(
+ !eq(ty, i32): SVE8BitLslImm32,
+ !eq(ty, i64): SVE8BitLslImm64);
+}
def SVEArithUImm8Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>;
def SVEArithUImm16Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i16>", []>;
def SVEArithUImm32Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i32>", []>;
-def SVEArithUImm64Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i64>", []>;
-def SVEArithSImmPat : ComplexPattern<i32, 1, "SelectSVESignedArithImm", []>;
+def SVEArithUImm64Pat : ComplexPattern<i64, 1, "SelectSVEArithImm<MVT::i64>", []>;
+
+def SVEArithSImmPat32 : ComplexPattern<i32, 1, "SelectSVESignedArithImm", []>;
+def SVEArithSImmPat64 : ComplexPattern<i64, 1, "SelectSVESignedArithImm", []>;
def SVEShiftImmL8 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 7>", []>;
def SVEShiftImmL16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 15>", []>;
def SVEShiftImmL32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 31>", []>;
-def SVEShiftImmL64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 63>", []>;
+def SVEShiftImmL64 : ComplexPattern<i64, 1, "SelectSVEShiftImm<0, 63>", []>;
def SVEShiftImmR8 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 8, true>", []>;
def SVEShiftImmR16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 16, true>", []>;
def SVEShiftImmR32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 32, true>", []>;
-def SVEShiftImmR64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 64, true>", []>;
+def SVEShiftImmR64 : ComplexPattern<i64, 1, "SelectSVEShiftImm<1, 64, true>", []>;
def SVEAllActive : ComplexPattern<untyped, 0, "SelectAllActivePredicate", []>;
@@ -260,14 +268,14 @@ def sve_incdec_imm : Operand<i32>, TImmLeaf<i32, [{
}
// This allows i32 immediate extraction from i64 based arithmetic.
-def sve_cnt_mul_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, false>">;
-def sve_cnt_shl_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, true>">;
-
+def sve_cnt_mul_imm_i32 : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, false>">;
+def sve_cnt_mul_imm_i64 : ComplexPattern<i64, 1, "SelectCntImm<1, 16, 1, false>">;
+def sve_cnt_shl_imm : ComplexPattern<i64, 1, "SelectCntImm<1, 16, 1, true>">;
-def sve_ext_imm_0_31 : ComplexPattern<i32, 1, "SelectEXTImm<31, 8>">;
-def sve_ext_imm_0_63 : ComplexPattern<i32, 1, "SelectEXTImm<63, 4>">;
-def sve_ext_imm_0_127 : ComplexPattern<i32, 1, "SelectEXTImm<127, 2>">;
-def sve_ext_imm_0_255 : ComplexPattern<i32, 1, "SelectEXTImm<255, 1>">;
+def sve_ext_imm_0_31 : ComplexPattern<i64, 1, "SelectEXTImm<31, 8>">;
+def sve_ext_imm_0_63 : ComplexPattern<i64, 1, "SelectEXTImm<63, 4>">;
+def sve_ext_imm_0_127 : ComplexPattern<i64, 1, "SelectEXTImm<127, 2>">;
+def sve_ext_imm_0_255 : ComplexPattern<i64, 1, "SelectEXTImm<255, 1>">;
def int_aarch64_sve_cntp_oneuse : PatFrag<(ops node:$pred, node:$src2),
(int_aarch64_sve_cntp node:$pred, node:$src2), [{
@@ -435,8 +443,8 @@ class SVE_4_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))),
(inst $Op1, $Op2, $Op3, ImmTy:$Op4)>;
-def SVEDup0 : ComplexPattern<i64, 0, "SelectDupZero", []>;
-def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>;
+def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
+def SVEDup0Undef : ComplexPattern<vAny, 0, "SelectDupZeroOrUndef", []>;
let AddedComplexity = 1 in {
class SVE_3_Op_Pat_SelZero<ValueType vtd, SDPatternOperator op, ValueType vt1,
@@ -868,10 +876,10 @@ multiclass sve_int_count<bits<3> opc, string asm, SDPatternOperator op> {
def : InstAlias<asm # "\t$Rd",
(!cast<Instruction>(NAME) GPR64:$Rd, 0b11111, 1), 2>;
- def : Pat<(i64 (mul (op sve_pred_enum:$pattern), (sve_cnt_mul_imm i32:$imm))),
+ def : Pat<(i64 (mul (op sve_pred_enum:$pattern), (sve_cnt_mul_imm_i64 i32:$imm))),
(!cast<Instruction>(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>;
- def : Pat<(i64 (shl (op sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm)))),
+ def : Pat<(i64 (shl (op sve_pred_enum:$pattern), (sve_cnt_shl_imm i32:$imm))),
(!cast<Instruction>(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>;
def : Pat<(i64 (op sve_pred_enum:$pattern)),
@@ -951,10 +959,10 @@ multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
def : Pat<(i64 (op GPR64:$Rdn, (opcnt sve_pred_enum:$pattern))),
(!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1)>;
- def : Pat<(i64 (op GPR64:$Rdn, (mul (opcnt sve_pred_enum:$pattern), (sve_cnt_mul_imm i32:$imm)))),
+ def : Pat<(i64 (op GPR64:$Rdn, (mul (opcnt sve_pred_enum:$pattern), (sve_cnt_mul_imm_i64 i32:$imm)))),
(!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>;
- def : Pat<(i64 (op GPR64:$Rdn, (shl (opcnt sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm))))),
+ def : Pat<(i64 (op GPR64:$Rdn, (shl (opcnt sve_pred_enum:$pattern), (sve_cnt_shl_imm i32:$imm)))),
(!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>;
def : Pat<(i32 (op GPR32:$Rdn, (i32 (trunc (opcnt (sve_pred_enum:$pattern)))))),
@@ -962,12 +970,12 @@ multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, 1),
sub_32))>;
- def : Pat<(i32 (op GPR32:$Rdn, (mul (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_mul_imm i32:$imm)))),
+ def : Pat<(i32 (op GPR32:$Rdn, (mul (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_mul_imm_i32 i32:$imm)))),
(i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm),
sub_32))>;
- def : Pat<(i32 (op GPR32:$Rdn, (shl (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (i64 (sve_cnt_shl_imm i32:$imm))))),
+ def : Pat<(i32 (op GPR32:$Rdn, (shl (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_shl_imm i32:$imm)))),
(i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm),
sub_32))>;
@@ -4324,10 +4332,10 @@ multiclass sve_int_arith_imm1<bits<2> opc, string asm, SDPatternOperator op> {
def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, simm8>;
def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, simm8>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_All_Active<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_All_Active<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_All_Active<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_All_Active<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperator op> {
@@ -4348,10 +4356,10 @@ multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>;
def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_All_Active<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_All_Active<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_All_Active<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_All_Active<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4542,7 +4550,7 @@ multiclass sve_int_dup_imm_pred_merge_inst<
(!cast<Instruction>(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>;
def : Pat<(intty
(vselect predty:$Pg,
- (intty (AArch64dup (scalarty (SVE8BitLslImm i32:$imm, i32:$shift)))),
+ (intty (AArch64dup (scalarty (SVE8BitLslImm<scalarty>.Pat i32:$imm, i32:$shift)))),
intty:$Zd)),
(!cast<Instruction>(NAME) zprty:$Zd, $Pg, i32:$imm, i32:$shift)>;
}
@@ -4580,7 +4588,7 @@ multiclass sve_int_dup_imm_pred_zero_inst<
(!cast<Instruction>(NAME) PPRAny:$Ps1, 1, 0)>;
def : Pat<(intty
(vselect predty:$Pg,
- (intty (AArch64dup (scalarty (SVE8BitLslImm i32:$imm, i32:$shift)))),
+ (intty (AArch64dup (scalarty (SVE8BitLslImm<scalarty>.Pat i32:$imm, i32:$shift)))),
(intty (AArch64dup (scalarty 0))))),
(!cast<Instruction>(NAME) $Pg, i32:$imm, i32:$shift)>;
}
@@ -6476,14 +6484,14 @@ multiclass sve_int_perm_rev_revh<string asm, SDPatternOperator op> {
def _S : sve_int_perm_rev<0b10, 0b01, asm, ZPR32>;
def _D : sve_int_perm_rev<0b11, 0b01, asm, ZPR64>;
- def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_perm_rev_revw<string asm, SDPatternOperator op> {
def _D : sve_int_perm_rev<0b11, 0b10, asm, ZPR64>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -8377,13 +8385,13 @@ multiclass sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm, SDPatter
}
/// Addressing modes
-def am_sve_indexed_s4 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-8,7>", [], [SDNPWantRoot]>;
-def am_sve_indexed_s6 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-32,31>", [], [SDNPWantRoot]>;
+def am_sve_indexed_s4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<-8,7>", [], [SDNPWantRoot]>;
+def am_sve_indexed_s6 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<-32,31>", [], [SDNPWantRoot]>;
-def am_sve_regreg_lsl0 : ComplexPattern<i64, 2, "SelectSVERegRegAddrMode<0>", []>;
-def am_sve_regreg_lsl1 : ComplexPattern<i64, 2, "SelectSVERegRegAddrMode<1>", []>;
-def am_sve_regreg_lsl2 : ComplexPattern<i64, 2, "SelectSVERegRegAddrMode<2>", []>;
-def am_sve_regreg_lsl3 : ComplexPattern<i64, 2, "SelectSVERegRegAddrMode<3>", []>;
+def am_sve_regreg_lsl0 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<0>", []>;
+def am_sve_regreg_lsl1 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<1>", []>;
+def am_sve_regreg_lsl2 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<2>", []>;
+def am_sve_regreg_lsl3 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<3>", []>;
// Predicated pseudo floating point two operand instructions.
multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index e72dccdc4b78..642080a0d40d 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -152,7 +152,7 @@ bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls(
// Remove the most encompassing ptrue, as well as any promoted ptrues, leaving
// behind only the ptrues to be coalesced.
PTrues.remove(MostEncompassingPTrue);
- PTrues.remove_if([](auto *PTrue) { return isPTruePromoted(PTrue); });
+ PTrues.remove_if(isPTruePromoted);
// Hoist MostEncompassingPTrue to the start of the basic block. It is always
// safe to do this, since ptrue intrinsic calls are guaranteed to have no
@@ -287,10 +287,10 @@ bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) {
if (!Attr.isValid())
return false;
- unsigned MinVScale, MaxVScale;
- std::tie(MinVScale, MaxVScale) = Attr.getVScaleRangeArgs();
+ unsigned MinVScale = Attr.getVScaleRangeMin();
+ Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax();
// The transform needs to know the exact runtime length of scalable vectors
- if (MinVScale != MaxVScale || MinVScale == 0)
+ if (!MaxVScale || MinVScale != MaxVScale)
return false;
auto *PredType =
@@ -351,10 +351,10 @@ bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) {
if (!Attr.isValid())
return false;
- unsigned MinVScale, MaxVScale;
- std::tie(MinVScale, MaxVScale) = Attr.getVScaleRangeArgs();
+ unsigned MinVScale = Attr.getVScaleRangeMin();
+ Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax();
// The transform needs to know the exact runtime length of scalable vectors
- if (MinVScale != MaxVScale || MinVScale == 0)
+ if (!MaxVScale || MinVScale != MaxVScale)
return false;
auto *PredType =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index aab76d27ef11..d28f38e42430 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -173,14 +173,7 @@ constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
const AMDGPUFunctionArgInfo &
AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
auto I = ArgInfoMap.find(&F);
- if (I == ArgInfoMap.end()) {
- if (AMDGPUTargetMachine::EnableFixedFunctionABI)
- return FixedABIFunctionInfo;
-
- // Without the fixed ABI, we assume no function has special inputs.
- assert(F.isDeclaration());
- return ExternFunctionInfo;
- }
-
+ if (I == ArgInfoMap.end())
+ return FixedABIFunctionInfo;
return I->second;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index f0aadab3302f..b4ebc7d7d75f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -112,6 +112,17 @@ static bool isDSAddress(const Constant *C) {
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
}
+/// Returns true if the function requires the implicit argument be passed
+/// regardless of the function contents.
+static bool funcRequiresImplicitArgPtr(const Function &F) {
+ // Sanitizers require the hostcall buffer passed in the implicit arguments.
+ return F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeThread) ||
+ F.hasFnAttribute(Attribute::SanitizeMemory) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeMemTag);
+}
+
namespace {
class AMDGPUInformationCache : public InformationCache {
public:
@@ -296,7 +307,7 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
bool AllCallSitesKnown = true;
if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
- indicatePessimisticFixpoint();
+ return indicatePessimisticFixpoint();
return Change;
}
@@ -339,7 +350,17 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
void initialize(Attributor &A) override {
Function *F = getAssociatedFunction();
+
+ // If the function requires the implicit arg pointer due to sanitizers,
+ // assume it's needed even if explicitly marked as not requiring it.
+ const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F);
+ if (NeedsImplicit)
+ removeAssumedBits(IMPLICIT_ARG_PTR);
+
for (auto Attr : ImplicitAttrs) {
+ if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR)
+ continue;
+
if (F->hasFnAttribute(Attr.second))
addKnownBits(Attr.first);
}
@@ -500,6 +521,9 @@ struct AAAMDFlatWorkGroupSize
std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
intersectKnown(
ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
+
+ if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
+ indicatePessimisticFixpoint();
}
ChangeStatus updateImpl(Attributor &A) override {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 43928d7c2a09..2f1e7823f65c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -652,8 +652,8 @@ bool AMDGPUCallLowering::lowerFormalArguments(
++PSInputNum;
if (SkipArg) {
- for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
- B.buildUndef(VRegs[Idx][I]);
+ for (Register R : VRegs[Idx])
+ B.buildUndef(R);
++Idx;
continue;
@@ -715,10 +715,9 @@ bool AMDGPUCallLowering::lowerFormalArguments(
if (!MBB.empty())
B.setInstr(*MBB.begin());
- if (!IsEntryFunc) {
+ if (!IsEntryFunc && !IsGraphics) {
// For the fixed ABI, pass workitem IDs in the last argument register.
- if (AMDGPUTargetMachine::EnableFixedFunctionABI)
- TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
+ TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
}
IncomingValueAssigner Assigner(AssignFn);
@@ -731,11 +730,6 @@ bool AMDGPUCallLowering::lowerFormalArguments(
uint64_t StackOffset = Assigner.StackOffset;
- if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) {
- // Special inputs come after user arguments.
- TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
- }
-
// Start adding system SGPRs.
if (IsEntryFunc) {
TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
@@ -829,9 +823,12 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
if (IncomingArg) {
LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
- } else {
- assert(InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
+ } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
+ } else {
+ // We may have proven the input wasn't needed, although the ABI is
+ // requiring it. We just need to allocate the register appropriately.
+ MIRBuilder.buildUndef(InputReg);
}
if (OutgoingArg->isRegister()) {
@@ -1233,8 +1230,7 @@ bool AMDGPUCallLowering::lowerTailCall(
// after the ordinary user argument registers.
SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
- if (AMDGPUTargetMachine::EnableFixedFunctionABI &&
- Info.CallConv != CallingConv::AMDGPU_Gfx) {
+ if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
// With a fixed ABI, allocate fixed registers before user arguments.
if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
return false;
@@ -1300,12 +1296,6 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
const DataLayout &DL = F.getParent()->getDataLayout();
- if (!AMDGPUTargetMachine::EnableFixedFunctionABI &&
- Info.CallConv != CallingConv::AMDGPU_Gfx) {
- LLVM_DEBUG(dbgs() << "Variable function ABI not implemented\n");
- return false;
- }
-
SmallVector<ArgInfo, 8> OutArgs;
for (auto &OrigArg : Info.OrigArgs)
splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
@@ -1359,8 +1349,7 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// after the ordinary user argument registers.
SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
- if (AMDGPUTargetMachine::EnableFixedFunctionABI &&
- Info.CallConv != CallingConv::AMDGPU_Gfx) {
+ if (Info.CallConv != CallingConv::AMDGPU_Gfx) {
// With a fixed ABI, allocate fixed registers before user arguments.
if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index c7c5ff7bcbe7..2415fdfecaae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -64,6 +64,30 @@ def int_minmax_to_med3 : GICombineRule<
[{ return RegBankHelper.matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
(apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;
+def fp_minmax_to_med3 : GICombineRule<
+ (defs root:$min_or_max, med3_matchdata:$matchinfo),
+ (match (wip_match_opcode G_FMAXNUM,
+ G_FMINNUM,
+ G_FMAXNUM_IEEE,
+ G_FMINNUM_IEEE):$min_or_max,
+ [{ return RegBankHelper.matchFPMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
+ (apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;
+
+def fp_minmax_to_clamp : GICombineRule<
+ (defs root:$min_or_max, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_FMAXNUM,
+ G_FMINNUM,
+ G_FMAXNUM_IEEE,
+ G_FMINNUM_IEEE):$min_or_max,
+ [{ return RegBankHelper.matchFPMinMaxToClamp(*${min_or_max}, ${matchinfo}); }]),
+ (apply [{ RegBankHelper.applyClamp(*${min_or_max}, ${matchinfo}); }])>;
+
+def fmed3_intrinsic_to_clamp : GICombineRule<
+ (defs root:$fmed3, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_INTRINSIC):$fmed3,
+ [{ return RegBankHelper.matchFPMed3ToClamp(*${fmed3}, ${matchinfo}); }]),
+ (apply [{ RegBankHelper.applyClamp(*${fmed3}, ${matchinfo}); }])>;
+
def remove_fcanonicalize_matchinfo : GIDefMatchData<"Register">;
def remove_fcanonicalize : GICombineRule<
@@ -102,7 +126,9 @@ def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
}
def AMDGPURegBankCombinerHelper : GICombinerHelper<
- "AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain]> {
+ "AMDGPUGenRegBankCombinerHelper",
+ [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
+ fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> {
let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
let StateClass = "AMDGPURegBankCombinerHelperState";
let AdditionalArguments = [];
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index 301e6f6d6f42..e79ff9b597c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -378,5 +378,4 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
}
MI.eraseFromParent();
- return;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 12cef2774aaf..7fd94a977be7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -172,6 +172,8 @@ def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;
def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;
+def : GINodeEquiv<G_AMDGPU_FMED3, AMDGPUfmed3_impl>;
+def : GINodeEquiv<G_AMDGPU_CLAMP, AMDGPUclamp>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index b9c59f4c615a..699c6c479455 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -280,11 +280,12 @@ void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
}
}
-void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
+void MetadataStreamerV2::emitKernelArgs(const Function &Func,
+ const GCNSubtarget &ST) {
for (auto &Arg : Func.args())
emitKernelArg(Arg);
- emitHiddenKernelArgs(Func);
+ emitHiddenKernelArgs(Func, ST);
}
void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
@@ -381,10 +382,9 @@ void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
}
}
-void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
- int HiddenArgNumBytes =
- getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
-
+void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
+ const GCNSubtarget &ST) {
+ unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
if (!HiddenArgNumBytes)
return;
@@ -465,11 +465,12 @@ void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
HSAMetadata.mKernels.push_back(Kernel::Metadata());
auto &Kernel = HSAMetadata.mKernels.back();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Kernel.mName = std::string(Func.getName());
Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
emitKernelLanguage(Func);
emitKernelAttrs(Func);
- emitKernelArgs(Func);
+ emitKernelArgs(Func, ST);
HSAMetadata.mKernels.back().mCodeProps = CodeProps;
HSAMetadata.mKernels.back().mDebugProps = DebugProps;
}
@@ -673,13 +674,14 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
}
void MetadataStreamerV3::emitKernelArgs(const Function &Func,
+ const GCNSubtarget &ST,
msgpack::MapDocNode Kern) {
unsigned Offset = 0;
auto Args = HSAMetadataDoc->getArrayNode();
for (auto &Arg : Func.args())
emitKernelArg(Arg, Offset, Args);
- emitHiddenKernelArgs(Func, Offset, Args);
+ emitHiddenKernelArgs(Func, ST, Offset, Args);
Kern[".args"] = Args;
}
@@ -791,11 +793,10 @@ void MetadataStreamerV3::emitKernelArg(
}
void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
+ const GCNSubtarget &ST,
unsigned &Offset,
msgpack::ArrayDocNode Args) {
- int HiddenArgNumBytes =
- getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
-
+ unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
if (!HiddenArgNumBytes)
return;
@@ -912,6 +913,7 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
auto Kern = getHSAKernelProps(MF, ProgramInfo);
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
Func.getCallingConv() == CallingConv::SPIR_KERNEL);
@@ -925,7 +927,7 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
emitKernelLanguage(Func, Kern);
emitKernelAttrs(Func, Kern);
- emitKernelArgs(Func, Kern);
+ emitKernelArgs(Func, ST, Kern);
}
Kernels.push_back(Kern);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index af5dae1cd8c0..54ed0afbba6d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -30,6 +30,7 @@ class MDNode;
class Module;
struct SIProgramInfo;
class Type;
+class GCNSubtarget;
namespace AMDGPU {
@@ -86,7 +87,8 @@ protected:
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern);
- void emitKernelArgs(const Function &Func, msgpack::MapDocNode Kern);
+ void emitKernelArgs(const Function &Func, const GCNSubtarget &ST,
+ msgpack::MapDocNode Kern);
void emitKernelArg(const Argument &Arg, unsigned &Offset,
msgpack::ArrayDocNode Args);
@@ -98,8 +100,8 @@ protected:
StringRef BaseTypeName = "", StringRef AccQual = "",
StringRef TypeQual = "");
- void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
- msgpack::ArrayDocNode Args);
+ void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST,
+ unsigned &Offset, msgpack::ArrayDocNode Args);
msgpack::DocNode &getRootMetadata(StringRef Key) {
return HSAMetadataDoc->getRoot().getMap(/*Convert=*/true)[Key];
@@ -173,7 +175,7 @@ private:
void emitKernelAttrs(const Function &Func);
- void emitKernelArgs(const Function &Func);
+ void emitKernelArgs(const Function &Func, const GCNSubtarget &ST);
void emitKernelArg(const Argument &Arg);
@@ -183,7 +185,7 @@ private:
StringRef BaseTypeName = "", StringRef AccQual = "",
StringRef TypeQual = "");
- void emitHiddenKernelArgs(const Function &Func);
+ void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST);
const Metadata &getHSAMetadata() const {
return HSAMetadata;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 88b4ec53a2a0..db84b8766924 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -892,6 +892,15 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
}
break;
}
+ case Intrinsic::amdgcn_is_shared:
+ case Intrinsic::amdgcn_is_private: {
+ if (isa<UndefValue>(II.getArgOperand(0)))
+ return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+
+ if (isa<ConstantPointerNull>(II.getArgOperand(0)))
+ return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
+ break;
+ }
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 1f898f2ba8b3..5046daaed977 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -533,7 +533,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16, V2S16})
.minScalar(0, S16)
- .clampMaxNumElements(0, S16, 2)
+ .clampMaxNumElementsStrict(0, S16, 2)
.widenScalarToNextMultipleOf(0, 32)
.maxScalar(0, S32)
.scalarize(0);
@@ -541,7 +541,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT})
.legalFor({S32, S16, V2S16}) // Clamp modifier
.minScalarOrElt(0, S16)
- .clampMaxNumElements(0, S16, 2)
+ .clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.widenScalarToNextPow2(0, 32)
.lower();
@@ -712,7 +712,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
}
if (ST.hasVOP3PInsts())
- FPOpActions.clampMaxNumElements(0, S16, 2);
+ FPOpActions.clampMaxNumElementsStrict(0, S16, 2);
FPOpActions
.scalarize(0)
@@ -728,7 +728,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_FNEG, G_FABS})
.legalFor(FPTypesPK16)
- .clampMaxNumElements(0, S16, 2)
+ .clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.clampScalar(0, S16, S64);
@@ -965,7 +965,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.has16BitInsts()) {
getActionDefinitionsBuilder(G_BSWAP)
.legalFor({S16, S32, V2S16})
- .clampMaxNumElements(0, S16, 2)
+ .clampMaxNumElementsStrict(0, S16, 2)
// FIXME: Fixing non-power-of-2 before clamp is workaround for
// narrowScalar limitation.
.widenScalarToNextPow2(0)
@@ -1052,10 +1052,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Split vector extloads.
unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
- unsigned AlignBits = Query.MMODescrs[0].AlignInBits;
-
- if (MemSize < DstTy.getSizeInBits())
- MemSize = std::max(MemSize, AlignBits);
if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
return true;
@@ -1077,12 +1073,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return true;
}
- if (AlignBits < MemSize) {
- const SITargetLowering *TLI = ST.getTargetLowering();
- return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS,
- Align(AlignBits / 8));
- }
-
return false;
};
@@ -1176,20 +1166,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (DstSize > MemSize)
return std::make_pair(0, LLT::scalar(MemSize));
- if (!isPowerOf2_32(DstSize)) {
- // We're probably decomposing an odd sized store. Try to split
- // to the widest type. TODO: Account for alignment. As-is it
- // should be OK, since the new parts will be further legalized.
- unsigned FloorSize = PowerOf2Floor(DstSize);
- return std::make_pair(0, LLT::scalar(FloorSize));
- }
-
- if (DstSize > 32 && (DstSize % 32 != 0)) {
- // FIXME: Need a way to specify non-extload of larger size if
- // suitably aligned.
- return std::make_pair(0, LLT::scalar(32 * (DstSize / 32)));
- }
-
unsigned MaxSize = maxSizeForAddrSpace(ST,
PtrTy.getAddressSpace(),
Op == G_LOAD);
@@ -1257,14 +1233,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
ElementCount::getFixed(FloorSize / EltSize), EltTy));
}
- // Need to split because of alignment.
- unsigned Align = Query.MMODescrs[0].AlignInBits;
- if (EltSize > Align &&
- (EltSize / Align < DstTy.getNumElements())) {
- return std::make_pair(
- 0, LLT::fixed_vector(EltSize / Align, EltTy));
- }
-
// May need relegalization for the scalars.
return std::make_pair(0, EltTy);
})
@@ -1457,6 +1425,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// FIXME: Doesn't handle extract of illegal sizes.
getActionDefinitionsBuilder(Op)
.lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32)))
+ .lowerIf([=](const LegalityQuery &Query) {
+ // Sub-vector(or single element) insert and extract.
+ // TODO: verify immediate offset here since lower only works with
+ // whole elements.
+ const LLT BigTy = Query.Types[BigTyIdx];
+ return BigTy.isVector();
+ })
// FIXME: Multiples of 16 should not be legal.
.legalIf([=](const LegalityQuery &Query) {
const LLT BigTy = Query.Types[BigTyIdx];
@@ -1615,7 +1590,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Prefer to reduce vector widths for 16-bit vectors before lowering, to
// get more vector shift opportunities, since we'll get those when
// expanded.
- .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16));
+ .clampMaxNumElementsStrict(0, S16, 2);
} else if (ST.has16BitInsts()) {
SextInReg.lowerFor({{S32}, {S64}, {S16}});
} else {
@@ -1637,14 +1612,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_FSHR)
.legalFor({{S32, S32}})
.lowerFor({{V2S16, V2S16}})
- .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
+ .clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.lower();
if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder(G_FSHL)
.lowerFor({{V2S16, V2S16}})
- .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
+ .clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.lower();
} else {
@@ -2567,10 +2542,8 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
} else {
// For cases where the widened type isn't a nice register value, unmerge
// from a widened register (e.g. <3 x s16> -> <4 x s16>)
- B.setInsertPt(B.getMBB(), ++B.getInsertPt());
- WideLoad = Helper.widenWithUnmerge(WideTy, ValReg);
- B.setInsertPt(B.getMBB(), MI.getIterator());
- B.buildLoadFromOffset(WideLoad, PtrReg, *MMO, 0);
+ WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0);
+ B.buildDeleteTrailingVectorElements(ValReg, WideLoad);
}
}
@@ -3843,6 +3816,10 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
llvm_unreachable("invalid data type");
}
+ if (StoreVT == LLT::fixed_vector(3, S16)) {
+ Reg = B.buildPadVectorWithUndefElements(LLT::fixed_vector(4, S16), Reg)
+ .getReg(0);
+ }
return Reg;
}
@@ -4237,8 +4214,17 @@ static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI,
(I >= Intr->GradientStart && I < Intr->CoordStart && !IsG16) ||
(I >= Intr->CoordStart && !IsA16)) {
// Handle any gradient or coordinate operands that should not be packed
- AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
- PackedAddrs.push_back(AddrReg);
+ if ((I < Intr->GradientStart) && IsA16 &&
+ (B.getMRI()->getType(AddrReg) == S16)) {
+ // Special handling of bias when A16 is on. Bias is of type half but
+ // occupies full 32-bit.
+ PackedAddrs.push_back(
+ B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)})
+ .getReg(0));
+ } else {
+ AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
+ PackedAddrs.push_back(AddrReg);
+ }
} else {
// Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D,
// derivatives dx/dh and dx/dv are packed with undef.
@@ -4676,9 +4662,23 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
// Deal with the one annoying legal case.
const LLT V3S16 = LLT::fixed_vector(3, 16);
if (Ty == V3S16) {
- padWithUndef(ResTy, RegsToCover - ResultRegs.size() + 1);
- auto Concat = B.buildConcatVectors(LLT::fixed_vector(6, 16), ResultRegs);
- B.buildUnmerge({DstReg, MRI->createGenericVirtualRegister(V3S16)}, Concat);
+ if (IsTFE) {
+ if (ResultRegs.size() == 1) {
+ NewResultReg = ResultRegs[0];
+ } else if (ResultRegs.size() == 2) {
+ LLT V4S16 = LLT::fixed_vector(4, 16);
+ NewResultReg = B.buildConcatVectors(V4S16, ResultRegs).getReg(0);
+ } else {
+ return false;
+ }
+ }
+
+ if (MRI->getType(DstReg).getNumElements() <
+ MRI->getType(NewResultReg).getNumElements()) {
+ B.buildDeleteTrailingVectorElements(DstReg, NewResultReg);
+ } else {
+ B.buildPadVectorWithUndefElements(DstReg, NewResultReg);
+ }
return true;
}
@@ -4869,8 +4869,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
}
Ops.push_back(RayExtent);
- auto packLanes = [&Ops, &S32, &B] (Register Src) {
- auto Unmerge = B.buildUnmerge({S32, S32, S32, S32}, Src);
+ auto packLanes = [&Ops, &S32, &B](Register Src) {
+ auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);
Ops.push_back(Unmerge.getReg(0));
Ops.push_back(Unmerge.getReg(1));
Ops.push_back(Unmerge.getReg(2));
@@ -4878,8 +4878,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
packLanes(RayOrigin);
if (IsA16) {
- auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16, S16}, RayDir);
- auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16, S16}, RayInvDir);
+ auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir);
+ auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir);
Register R1 = MRI.createGenericVirtualRegister(S32);
Register R2 = MRI.createGenericVirtualRegister(S32);
Register R3 = MRI.createGenericVirtualRegister(S32);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 12d6d35a6917..6e2b5dc471bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -24,13 +24,6 @@
// A possible future refinement is to specialise the structure per-kernel, so
// that fields can be elided based on more expensive analysis.
//
-// NOTE: Since this pass will directly pack LDS (assume large LDS) into a struct
-// type which would cause allocating huge memory for struct instance within
-// every kernel. Hence, before running this pass, it is advisable to run the
-// pass "amdgpu-replace-lds-use-with-pointer" which will replace LDS uses within
-// non-kernel functions by pointers and thereby minimizes the unnecessary per
-// kernel allocation of LDS memory.
-//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
@@ -62,6 +55,20 @@ static cl::opt<bool> SuperAlignLDSGlobals(
namespace {
+SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) {
+ SmallPtrSet<GlobalValue *, 32> UsedList;
+
+ SmallVector<GlobalValue *, 32> TmpVec;
+ collectUsedGlobalVariables(M, TmpVec, true);
+ UsedList.insert(TmpVec.begin(), TmpVec.end());
+
+ TmpVec.clear();
+ collectUsedGlobalVariables(M, TmpVec, false);
+ UsedList.insert(TmpVec.begin(), TmpVec.end());
+
+ return UsedList;
+}
+
class AMDGPULowerModuleLDS : public ModulePass {
static void removeFromUsedList(Module &M, StringRef Name,
@@ -105,11 +112,9 @@ class AMDGPULowerModuleLDS : public ModulePass {
removeFromUsedLists(Module &M,
const std::vector<GlobalVariable *> &LocalVars) {
SmallPtrSet<Constant *, 32> LocalVarsSet;
- for (size_t I = 0; I < LocalVars.size(); I++) {
- if (Constant *C = dyn_cast<Constant>(LocalVars[I]->stripPointerCasts())) {
+ for (GlobalVariable *LocalVar : LocalVars)
+ if (Constant *C = dyn_cast<Constant>(LocalVar->stripPointerCasts()))
LocalVarsSet.insert(C);
- }
- }
removeFromUsedList(M, "llvm.used", LocalVarsSet);
removeFromUsedList(M, "llvm.compiler.used", LocalVarsSet);
}
@@ -158,9 +163,9 @@ public:
}
bool runOnModule(Module &M) override {
- UsedList = AMDGPU::getUsedList(M);
-
- bool Changed = processUsedLDS(M);
+ UsedList = getUsedList(M);
+ bool Changed = superAlignLDSGlobals(M);
+ Changed |= processUsedLDS(M);
for (Function &F : M.functions()) {
if (F.isDeclaration())
@@ -177,6 +182,50 @@ public:
}
private:
+ // Increase the alignment of LDS globals if necessary to maximise the chance
+ // that we can use aligned LDS instructions to access them.
+ static bool superAlignLDSGlobals(Module &M) {
+ const DataLayout &DL = M.getDataLayout();
+ bool Changed = false;
+ if (!SuperAlignLDSGlobals) {
+ return Changed;
+ }
+
+ for (auto &GV : M.globals()) {
+ if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
+ // Only changing alignment of LDS variables
+ continue;
+ }
+ if (!GV.hasInitializer()) {
+ // cuda/hip extern __shared__ variable, leave alignment alone
+ continue;
+ }
+
+ Align Alignment = AMDGPU::getAlign(DL, &GV);
+ TypeSize GVSize = DL.getTypeAllocSize(GV.getValueType());
+
+ if (GVSize > 8) {
+ // We might want to use a b96 or b128 load/store
+ Alignment = std::max(Alignment, Align(16));
+ } else if (GVSize > 4) {
+ // We might want to use a b64 load/store
+ Alignment = std::max(Alignment, Align(8));
+ } else if (GVSize > 2) {
+ // We might want to use a b32 load/store
+ Alignment = std::max(Alignment, Align(4));
+ } else if (GVSize > 1) {
+ // We might want to use a b16 load/store
+ Alignment = std::max(Alignment, Align(2));
+ }
+
+ if (Alignment != AMDGPU::getAlign(DL, &GV)) {
+ Changed = true;
+ GV.setAlignment(Alignment);
+ }
+ }
+ return Changed;
+ }
+
bool processUsedLDS(Module &M, Function *F = nullptr) {
LLVMContext &Ctx = M.getContext();
const DataLayout &DL = M.getDataLayout();
@@ -190,31 +239,6 @@ private:
return false;
}
- // Increase the alignment of LDS globals if necessary to maximise the chance
- // that we can use aligned LDS instructions to access them.
- if (SuperAlignLDSGlobals) {
- for (auto *GV : FoundLocalVars) {
- Align Alignment = AMDGPU::getAlign(DL, GV);
- TypeSize GVSize = DL.getTypeAllocSize(GV->getValueType());
-
- if (GVSize > 8) {
- // We might want to use a b96 or b128 load/store
- Alignment = std::max(Alignment, Align(16));
- } else if (GVSize > 4) {
- // We might want to use a b64 load/store
- Alignment = std::max(Alignment, Align(8));
- } else if (GVSize > 2) {
- // We might want to use a b32 load/store
- Alignment = std::max(Alignment, Align(4));
- } else if (GVSize > 1) {
- // We might want to use a b16 load/store
- Alignment = std::max(Alignment, Align(2));
- }
-
- GV->setAlignment(Alignment);
- }
- }
-
SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
LayoutFields.reserve(FoundLocalVars.size());
for (GlobalVariable *GV : FoundLocalVars) {
@@ -343,20 +367,14 @@ private:
refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
}
- // Mark kernels with asm that reads the address of the allocated structure
- // This is not necessary for lowering. This lets other passes, specifically
- // PromoteAlloca, accurately calculate how much LDS will be used by the
- // kernel after lowering.
+ // This ensures the variable is allocated when called functions access it.
+ // It also lets other passes, specifically PromoteAlloca, accurately
+ // calculate how much LDS will be used by the kernel after lowering.
if (!F) {
IRBuilder<> Builder(Ctx);
- SmallPtrSet<Function *, 32> Kernels;
for (Function &Func : M.functions()) {
- if (Func.isDeclaration())
- continue;
-
- if (AMDGPU::isKernelCC(&Func) && !Kernels.contains(&Func)) {
+ if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) {
markUsedByKernel(Builder, &Func, SGV);
- Kernels.insert(&Func);
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 5d4b007f11e6..4e2f98d2a5db 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -2786,12 +2786,8 @@ AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned Selec
// Fixme: Move linearization creation to the original spot
createLinearizedRegion(Region, SelectOut);
- for (auto CI = Region->getChildren()->begin(),
- CE = Region->getChildren()->end();
- CI != CE; ++CI) {
- InnerSelectOut =
- initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII);
- }
+ for (auto *CI : *Region->getChildren())
+ InnerSelectOut = initializeSelectRegisters(CI, InnerSelectOut, MRI, TII);
MRT->setBBSelectRegIn(InnerSelectOut);
return InnerSelectOut;
} else {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 2aa02299ecdc..8ad344816ad2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -119,31 +119,27 @@ private:
bool isConstantAddr(const Value *V) const;
};
-static const Value *getMemoryInstrPtr(const Instruction *Inst) {
- if (auto LI = dyn_cast<LoadInst>(Inst)) {
- return LI->getPointerOperand();
- }
- if (auto SI = dyn_cast<StoreInst>(Inst)) {
- return SI->getPointerOperand();
- }
- if (auto AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
- return AI->getPointerOperand();
- }
- if (auto AI = dyn_cast<AtomicRMWInst>(Inst)) {
- return AI->getPointerOperand();
- }
- if (auto MI = dyn_cast<AnyMemIntrinsic>(Inst)) {
- return MI->getRawDest();
- }
+static std::pair<const Value *, const Type *> getMemoryInstrPtrAndType(
+ const Instruction *Inst) {
+ if (auto LI = dyn_cast<LoadInst>(Inst))
+ return {LI->getPointerOperand(), LI->getType()};
+ if (auto SI = dyn_cast<StoreInst>(Inst))
+ return {SI->getPointerOperand(), SI->getValueOperand()->getType()};
+ if (auto AI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ return {AI->getPointerOperand(), AI->getCompareOperand()->getType()};
+ if (auto AI = dyn_cast<AtomicRMWInst>(Inst))
+ return {AI->getPointerOperand(), AI->getValOperand()->getType()};
+ if (auto MI = dyn_cast<AnyMemIntrinsic>(Inst))
+ return {MI->getRawDest(), Type::getInt8Ty(MI->getContext())};
- return nullptr;
+ return {nullptr, nullptr};
}
bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
LLVM_DEBUG(dbgs() << "[isIndirectAccess] " << *Inst << '\n');
SmallSet<const Value *, 32> WorkSet;
SmallSet<const Value *, 32> Visited;
- if (const Value *MO = getMemoryInstrPtr(Inst)) {
+ if (const Value *MO = getMemoryInstrPtrAndType(Inst).first) {
if (isGlobalAddr(MO))
WorkSet.insert(MO);
}
@@ -209,10 +205,8 @@ AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
for (auto &B : F) {
LastAccess = MemAccessInfo();
for (auto &I : B) {
- if (const Value *Ptr = getMemoryInstrPtr(&I)) {
- unsigned Size = divideCeil(
- Ptr->getType()->getPointerElementType()->getPrimitiveSizeInBits(),
- 32);
+ if (const Type *Ty = getMemoryInstrPtrAndType(&I).second) {
+ unsigned Size = divideCeil(Ty->getPrimitiveSizeInBits(), 32);
if (isIndirectAccess(&I))
FI.IAMInstCost += Size;
if (isLargeStride(&I))
@@ -326,7 +320,7 @@ bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) {
AMDGPUPerfHint::MemAccessInfo
AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
MemAccessInfo MAI;
- const Value *MO = getMemoryInstrPtr(Inst);
+ const Value *MO = getMemoryInstrPtrAndType(Inst).first;
LLVM_DEBUG(dbgs() << "[isLargeStride] MO: " << *MO << '\n');
// Do not treat local-addr memory access as large stride.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 3ec5dd7e0eff..f9a9fe403ff6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -939,7 +939,7 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
GlobalVariable::NotThreadLocal,
AMDGPUAS::LOCAL_ADDRESS);
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- GV->setAlignment(MaybeAlign(I.getAlignment()));
+ GV->setAlignment(I.getAlign());
Value *TCntY, *TCntZ;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 12b5830ef930..3ce67a733c10 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -16,6 +16,7 @@
#include "AMDGPURegisterBankInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -23,6 +24,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-regbank-combiner"
@@ -36,13 +38,15 @@ protected:
MachineRegisterInfo &MRI;
const RegisterBankInfo &RBI;
const TargetRegisterInfo &TRI;
+ const SIInstrInfo &TII;
CombinerHelper &Helper;
public:
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
: B(B), MF(B.getMF()), MRI(*B.getMRI()),
RBI(*MF.getSubtarget().getRegBankInfo()),
- TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
+ TRI(*MF.getSubtarget().getRegisterInfo()),
+ TII(*MF.getSubtarget<GCNSubtarget>().getInstrInfo()), Helper(Helper){};
bool isVgprRegBank(Register Reg);
Register getAsVgpr(Register Reg);
@@ -63,7 +67,19 @@ public:
Register &Val, CstTy &K0, CstTy &K1);
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
+ bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
+ bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg);
+ bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg);
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
+ void applyClamp(MachineInstr &MI, Register &Reg);
+
+private:
+ AMDGPU::SIModeRegisterDefaults getMode();
+ bool getIEEE();
+ bool getDX10Clamp();
+ bool isFminnumIeee(const MachineInstr &MI);
+ bool isFCst(MachineInstr *MI);
+ bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1);
};
bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
@@ -98,6 +114,13 @@ AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
case AMDGPU::G_UMAX:
case AMDGPU::G_UMIN:
return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM:
+ return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_FMINNUM_IEEE:
+ return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
+ AMDGPU::G_AMDGPU_FMED3};
}
}
@@ -148,6 +171,146 @@ bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
return true;
}
+// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
+// ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
+// ieee = false : min/max(NaN, K) = K
+// clamp(NaN) = dx10_clamp ? 0.0 : NaN
+// Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
+// Other operand commutes (see matchMed) give same result since min and max are
+// commutative.
+
+// Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
+// with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
+// Val = SNaN only for ieee = true
+// fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
+// min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
+// max(min(SNaN, K1), K0) = max(K1, K0) = K1
+// Val = NaN,ieee = false or Val = QNaN,ieee = true
+// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
+// min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
+// max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
+bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3(
+ MachineInstr &MI, Med3MatchInfo &MatchInfo) {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (Ty != LLT::scalar(16) && Ty != LLT::scalar(32))
+ return false;
+
+ auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
+
+ Register Val;
+ Optional<FPValueAndVReg> K0, K1;
+ // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
+ if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
+ return false;
+
+ if (K0->Value > K1->Value)
+ return false;
+
+ // For IEEE=false perform combine only when it's safe to assume that there are
+ // no NaN inputs. Most often MI is marked with nnan fast math flag.
+ // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
+ // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
+ // nodes(max/min) have same behavior when one input is NaN and other isn't.
+ // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
+ // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
+ if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
+ // Don't fold single use constant that can't be inlined.
+ if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
+ (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
+ MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool AMDGPURegBankCombinerHelper::matchFPMinMaxToClamp(MachineInstr &MI,
+ Register &Reg) {
+ // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
+ auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
+ Register Val;
+ Optional<FPValueAndVReg> K0, K1;
+ // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
+ if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
+ return false;
+
+ if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
+ return false;
+
+ // For IEEE=false perform combine only when it's safe to assume that there are
+ // no NaN inputs. Most often MI is marked with nnan fast math flag.
+ // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
+ // to 0.0 requires dx10_clamp = true.
+ if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
+ isKnownNeverSNaN(Val, MRI)) ||
+ isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
+ Reg = Val;
+ return true;
+ }
+
+ return false;
+}
+
+// Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
+// Val = SNaN only for ieee = true. It is important which operand is NaN.
+// min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
+// min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
+// min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
+// Val = NaN,ieee = false or Val = QNaN,ieee = true
+// min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
+// min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
+// min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
+bool AMDGPURegBankCombinerHelper::matchFPMed3ToClamp(MachineInstr &MI,
+ Register &Reg) {
+ if (MI.getIntrinsicID() != Intrinsic::amdgcn_fmed3)
+ return false;
+
+ // In llvm-ir, clamp is often represented as an intrinsic call to
+ // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
+ MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
+ MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
+ MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
+
+ if (isFCst(Src0) && !isFCst(Src1))
+ std::swap(Src0, Src1);
+ if (isFCst(Src1) && !isFCst(Src2))
+ std::swap(Src1, Src2);
+ if (isFCst(Src0) && !isFCst(Src1))
+ std::swap(Src0, Src1);
+ if (!isClampZeroToOne(Src1, Src2))
+ return false;
+
+ Register Val = Src0->getOperand(0).getReg();
+
+ auto isOp3Zero = [&]() {
+ MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
+ if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
+ return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
+ return false;
+ };
+ // For IEEE=false perform combine only when it's safe to assume that there are
+ // no NaN inputs. Most often MI is marked with nnan fast math flag.
+ // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
+ // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
+ if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
+ (getIEEE() && getDX10Clamp() &&
+ (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
+ Reg = Val;
+ return true;
+ }
+
+ return false;
+}
+
+void AMDGPURegBankCombinerHelper::applyClamp(MachineInstr &MI, Register &Reg) {
+ B.setInstrAndDebugLoc(MI);
+ B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
+ MI.getFlags());
+ MI.eraseFromParent();
+}
+
void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
Med3MatchInfo &MatchInfo) {
B.setInstrAndDebugLoc(MI);
@@ -158,6 +321,33 @@ void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
MI.eraseFromParent();
}
+AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
+ return MF.getInfo<SIMachineFunctionInfo>()->getMode();
+}
+
+bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; }
+
+bool AMDGPURegBankCombinerHelper::getDX10Clamp() { return getMode().DX10Clamp; }
+
+bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) {
+ return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
+}
+
+bool AMDGPURegBankCombinerHelper::isFCst(MachineInstr *MI) {
+ return MI->getOpcode() == AMDGPU::G_FCONSTANT;
+}
+
+bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0,
+ MachineInstr *K1) {
+ if (isFCst(K0) && isFCst(K1)) {
+ const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
+ const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
+ return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
+ (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
+ }
+ return false;
+}
+
class AMDGPURegBankCombinerHelperState {
protected:
CombinerHelper &Helper;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 5988403c0a29..c60012bcfe2e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -707,9 +707,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
iterator_range<MachineBasicBlock::iterator> Range,
SmallSet<Register, 4> &SGPROperandRegs,
MachineRegisterInfo &MRI) const {
- SmallVector<Register, 4> ResultRegs;
- SmallVector<Register, 4> InitResultRegs;
- SmallVector<Register, 4> PhiRegs;
// Track use registers which have already been expanded with a readfirstlane
// sequence. This may have multiple uses if moving a sequence.
@@ -774,15 +771,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
.addReg(NewExec)
.addMBB(LoopBB);
- for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
- B.buildInstr(TargetOpcode::G_PHI)
- .addDef(std::get<2>(Result))
- .addReg(std::get<0>(Result)) // Initial value / implicit_def
- .addMBB(&MBB)
- .addReg(std::get<1>(Result)) // Mid-loop value.
- .addMBB(LoopBB);
- }
-
const DebugLoc &DL = B.getDL();
MachineInstr &FirstInst = *Range.begin();
@@ -1174,18 +1162,25 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
// 96-bit loads are only available for vector loads. We need to split this
// into a 64-bit part, and 32 (unless we can widen to a 128-bit load).
if (MMO->getAlign() < Align(16)) {
+ MachineFunction *MF = MI.getParent()->getParent();
+ ApplyRegBankMapping ApplyBank(*this, MRI, DstBank);
+ MachineIRBuilder B(MI, ApplyBank);
+ LegalizerHelper Helper(*MF, ApplyBank, B);
LLT Part64, Part32;
std::tie(Part64, Part32) = splitUnequalType(LoadTy, 64);
- auto Load0 = B.buildLoadFromOffset(Part64, PtrReg, *MMO, 0);
- auto Load1 = B.buildLoadFromOffset(Part32, PtrReg, *MMO, 8);
-
- auto Undef = B.buildUndef(LoadTy);
- auto Ins0 = B.buildInsert(LoadTy, Undef, Load0, 0);
- B.buildInsert(MI.getOperand(0), Ins0, Load1, 64);
+ if (Helper.reduceLoadStoreWidth(cast<GAnyLoad>(MI), 0, Part64) !=
+ LegalizerHelper::Legalized)
+ return false;
+ return true;
} else {
LLT WiderTy = widen96To128(LoadTy);
auto WideLoad = B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
- B.buildExtract(MI.getOperand(0), WideLoad, 0);
+ if (WiderTy.isScalar())
+ B.buildTrunc(MI.getOperand(0), WideLoad);
+ else {
+ B.buildDeleteTrailingVectorElements(MI.getOperand(0).getReg(),
+ WideLoad);
+ }
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
index d55bf3917e9c..2475b44b42a3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
@@ -87,6 +87,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
+#include "llvm/Analysis/CallGraph.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -110,6 +111,18 @@ using namespace llvm;
namespace {
+namespace AMDGPU {
+/// Collect all the instructions where user \p U belongs to. \p U could be
+/// instruction itself or it could be a constant expression which is used within
+/// an instruction. If \p CollectKernelInsts is true, collect instructions only
+/// from kernels, otherwise collect instructions only from non-kernel functions.
+DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
+getFunctionToInstsMap(User *U, bool CollectKernelInsts);
+
+SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV);
+
+} // namespace AMDGPU
+
class ReplaceLDSUseImpl {
Module &M;
LLVMContext &Ctx;
@@ -127,7 +140,8 @@ class ReplaceLDSUseImpl {
// Collect LDS which requires their uses to be replaced by pointer.
std::vector<GlobalVariable *> collectLDSRequiringPointerReplace() {
// Collect LDS which requires module lowering.
- std::vector<GlobalVariable *> LDSGlobals = AMDGPU::findVariablesToLower(M);
+ std::vector<GlobalVariable *> LDSGlobals =
+ llvm::AMDGPU::findVariablesToLower(M);
// Remove LDS which don't qualify for replacement.
llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) {
@@ -172,7 +186,7 @@ class ReplaceLDSUseImpl {
AMDGPUAS::LOCAL_ADDRESS);
LDSPointer->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- LDSPointer->setAlignment(AMDGPU::getAlign(DL, LDSPointer));
+ LDSPointer->setAlignment(llvm::AMDGPU::getAlign(DL, LDSPointer));
// Mark that an associated LDS pointer is created for LDS.
LDSToPointer[GV] = LDSPointer;
@@ -245,10 +259,9 @@ class ReplaceLDSUseImpl {
auto FunctionToInsts =
AMDGPU::getFunctionToInstsMap(U, false /*=CollectKernelInsts*/);
- for (auto FI = FunctionToInsts.begin(), FE = FunctionToInsts.end();
- FI != FE; ++FI) {
- Function *F = FI->first;
- auto &Insts = FI->second;
+ for (const auto &FunctionToInst : FunctionToInsts) {
+ Function *F = FunctionToInst.first;
+ auto &Insts = FunctionToInst.second;
for (auto *I : Insts) {
// If `U` is a constant expression, then we need to break the
// associated instruction into a set of separate instructions by
@@ -341,10 +354,9 @@ bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
// Traverse through each kernel K, check and if required, initialize the
// LDS pointer to point to LDS within K.
- for (auto KI = KernelToCallees.begin(), KE = KernelToCallees.end(); KI != KE;
- ++KI) {
- Function *K = KI->first;
- SmallPtrSet<Function *, 8> Callees = KI->second;
+ for (const auto &KernelToCallee : KernelToCallees) {
+ Function *K = KernelToCallee.first;
+ SmallPtrSet<Function *, 8> Callees = KernelToCallee.second;
// Compute reachable and LDS used callees for kernel K.
set_intersect(Callees, LDSAccessors);
@@ -378,6 +390,184 @@ bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
return true;
}
+namespace AMDGPU {
+
+// An helper class for collecting all reachable callees for each kernel defined
+// within the module.
+class CollectReachableCallees {
+ Module &M;
+ CallGraph CG;
+ SmallPtrSet<CallGraphNode *, 8> AddressTakenFunctions;
+
+ // Collect all address taken functions within the module.
+ void collectAddressTakenFunctions() {
+ auto *ECNode = CG.getExternalCallingNode();
+
+ for (const auto &GI : *ECNode) {
+ auto *CGN = GI.second;
+ auto *F = CGN->getFunction();
+ if (!F || F->isDeclaration() || llvm::AMDGPU::isKernelCC(F))
+ continue;
+ AddressTakenFunctions.insert(CGN);
+ }
+ }
+
+ // For given kernel, collect all its reachable non-kernel functions.
+ SmallPtrSet<Function *, 8> collectReachableCallees(Function *K) {
+ SmallPtrSet<Function *, 8> ReachableCallees;
+
+ // Call graph node which represents this kernel.
+ auto *KCGN = CG[K];
+
+ // Go through all call graph nodes reachable from the node representing this
+ // kernel, visit all their call sites, if the call site is direct, add
+ // corresponding callee to reachable callee set, if it is indirect, resolve
+ // the indirect call site to potential reachable callees, add them to
+ // reachable callee set, and repeat the process for the newly added
+ // potential callee nodes.
+ //
+ // FIXME: Need to handle bit-casted function pointers.
+ //
+ SmallVector<CallGraphNode *, 8> CGNStack(depth_first(KCGN));
+ SmallPtrSet<CallGraphNode *, 8> VisitedCGNodes;
+ while (!CGNStack.empty()) {
+ auto *CGN = CGNStack.pop_back_val();
+
+ if (!VisitedCGNodes.insert(CGN).second)
+ continue;
+
+ // Ignore call graph node which does not have associated function or
+ // associated function is not a definition.
+ if (!CGN->getFunction() || CGN->getFunction()->isDeclaration())
+ continue;
+
+ for (const auto &GI : *CGN) {
+ auto *RCB = cast<CallBase>(GI.first.getValue());
+ auto *RCGN = GI.second;
+
+ if (auto *DCallee = RCGN->getFunction()) {
+ ReachableCallees.insert(DCallee);
+ } else if (RCB->isIndirectCall()) {
+ auto *RCBFTy = RCB->getFunctionType();
+ for (auto *ACGN : AddressTakenFunctions) {
+ auto *ACallee = ACGN->getFunction();
+ if (ACallee->getFunctionType() == RCBFTy) {
+ ReachableCallees.insert(ACallee);
+ CGNStack.append(df_begin(ACGN), df_end(ACGN));
+ }
+ }
+ }
+ }
+ }
+
+ return ReachableCallees;
+ }
+
+public:
+ explicit CollectReachableCallees(Module &M) : M(M), CG(CallGraph(M)) {
+ // Collect address taken functions.
+ collectAddressTakenFunctions();
+ }
+
+ void collectReachableCallees(
+ DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
+ // Collect reachable callee set for each kernel defined in the module.
+ for (Function &F : M.functions()) {
+ if (!llvm::AMDGPU::isKernelCC(&F))
+ continue;
+ Function *K = &F;
+ KernelToCallees[K] = collectReachableCallees(K);
+ }
+ }
+};
+
+/// Collect reachable callees for each kernel defined in the module \p M and
+/// return collected callees at \p KernelToCallees.
+void collectReachableCallees(
+ Module &M,
+ DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
+ CollectReachableCallees CRC{M};
+ CRC.collectReachableCallees(KernelToCallees);
+}
+
+/// For the given LDS global \p GV, visit all its users and collect all
+/// non-kernel functions within which \p GV is used and return collected list of
+/// such non-kernel functions.
+SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV) {
+ SmallPtrSet<Function *, 8> LDSAccessors;
+ SmallVector<User *, 8> UserStack(GV->users());
+ SmallPtrSet<User *, 8> VisitedUsers;
+
+ while (!UserStack.empty()) {
+ auto *U = UserStack.pop_back_val();
+
+ // `U` is already visited? continue to next one.
+ if (!VisitedUsers.insert(U).second)
+ continue;
+
+ // `U` is a global variable which is initialized with LDS. Ignore LDS.
+ if (isa<GlobalValue>(U))
+ return SmallPtrSet<Function *, 8>();
+
+ // Recursively explore constant users.
+ if (isa<Constant>(U)) {
+ append_range(UserStack, U->users());
+ continue;
+ }
+
+ // `U` should be an instruction, if it belongs to a non-kernel function F,
+ // then collect F.
+ Function *F = cast<Instruction>(U)->getFunction();
+ if (!llvm::AMDGPU::isKernelCC(F))
+ LDSAccessors.insert(F);
+ }
+
+ return LDSAccessors;
+}
+
+DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
+getFunctionToInstsMap(User *U, bool CollectKernelInsts) {
+ DenseMap<Function *, SmallPtrSet<Instruction *, 8>> FunctionToInsts;
+ SmallVector<User *, 8> UserStack;
+ SmallPtrSet<User *, 8> VisitedUsers;
+
+ UserStack.push_back(U);
+
+ while (!UserStack.empty()) {
+ auto *UU = UserStack.pop_back_val();
+
+ if (!VisitedUsers.insert(UU).second)
+ continue;
+
+ if (isa<GlobalValue>(UU))
+ continue;
+
+ if (isa<Constant>(UU)) {
+ append_range(UserStack, UU->users());
+ continue;
+ }
+
+ auto *I = cast<Instruction>(UU);
+ Function *F = I->getFunction();
+ if (CollectKernelInsts) {
+ if (!llvm::AMDGPU::isKernelCC(F)) {
+ continue;
+ }
+ } else {
+ if (llvm::AMDGPU::isKernelCC(F)) {
+ continue;
+ }
+ }
+
+ FunctionToInsts.insert(std::make_pair(F, SmallPtrSet<Instruction *, 8>()));
+ FunctionToInsts[F].insert(I);
+ }
+
+ return FunctionToInsts;
+}
+
+} // namespace AMDGPU
+
// Entry-point function which interface ReplaceLDSUseImpl with outside of the
// class.
bool ReplaceLDSUseImpl::replaceLDSUse() {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 0655b4342ba1..cd05797fdbdb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -413,21 +413,21 @@ bool GCNSubtarget::zeroesHigh16BitsOfDest(unsigned Opcode) const {
case AMDGPU::V_MAX_I16_e32:
case AMDGPU::V_MIN_I16_e64:
case AMDGPU::V_MIN_I16_e32:
+ case AMDGPU::V_MAD_F16_e64:
+ case AMDGPU::V_MAD_U16_e64:
+ case AMDGPU::V_MAD_I16_e64:
+ case AMDGPU::V_FMA_F16_e64:
+ case AMDGPU::V_DIV_FIXUP_F16_e64:
// On gfx10, all 16-bit instructions preserve the high bits.
return getGeneration() <= AMDGPUSubtarget::GFX9;
- case AMDGPU::V_MAD_F16_e64:
case AMDGPU::V_MADAK_F16:
case AMDGPU::V_MADMK_F16:
case AMDGPU::V_MAC_F16_e64:
case AMDGPU::V_MAC_F16_e32:
case AMDGPU::V_FMAMK_F16:
case AMDGPU::V_FMAAK_F16:
- case AMDGPU::V_MAD_U16_e64:
- case AMDGPU::V_MAD_I16_e64:
- case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMAC_F16_e64:
case AMDGPU::V_FMAC_F16_e32:
- case AMDGPU::V_DIV_FIXUP_F16_e64:
// In gfx9, the preferred handling of the unused high 16-bits changed. Most
// instructions maintain the legacy behavior of 0ing. Some instructions
// changed to preserving the high bits.
@@ -648,9 +648,18 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
}
unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
+ assert(AMDGPU::isKernel(F.getCallingConv()));
+
+ // We don't allocate the segment if we know the implicit arguments weren't
+ // used, even if the ABI implies we need them.
+ if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
+ return 0;
+
if (isMesaKernel(F))
return 16;
- return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
+
+ // Assume all implicit inputs are used by default
+ return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 56);
}
uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index de11676279f2..a2c61f9da8da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -231,13 +231,6 @@ static cl::opt<bool, true> LateCFGStructurize(
cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
cl::Hidden);
-static cl::opt<bool, true> EnableAMDGPUFixedFunctionABIOpt(
- "amdgpu-fixed-function-abi",
- cl::desc("Enable all implicit function arguments"),
- cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI),
- cl::init(false),
- cl::Hidden);
-
// Enable lib calls simplifications
static cl::opt<bool> EnableLibCallSimplify(
"amdgpu-simplify-libcall",
@@ -505,7 +498,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
bool AMDGPUTargetMachine::EnableFunctionCalls = false;
-bool AMDGPUTargetMachine::EnableFixedFunctionABI = false;
bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 0ff2db2a52d9..226646a96953 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -37,7 +37,6 @@ protected:
public:
static bool EnableLateStructurizeCFG;
static bool EnableFunctionCalls;
- static bool EnableFixedFunctionABI;
static bool EnableLowerModuleLDS;
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index ecdbdf613a53..09c5eb192e1f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -519,57 +519,6 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
- EVT OrigTy = TLI->getValueType(DL, Ty);
- if (!OrigTy.isSimple()) {
- // FIXME: We're having to query the throughput cost so that the basic
- // implementation tries to generate legalize and scalarization costs. Maybe
- // we could hoist the scalarization code here?
- if (CostKind != TTI::TCK_CodeSize)
- return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput,
- Opd1Info, Opd2Info, Opd1PropInfo,
- Opd2PropInfo, Args, CxtI);
- // Scalarization
-
- // Check if any of the operands are vector operands.
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
-
- bool IsFloat = Ty->isFPOrFPVectorTy();
- // Assume that floating point arithmetic operations cost twice as much as
- // integer operations.
- unsigned OpCost = (IsFloat ? 2 : 1);
-
- if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
- // The operation is legal. Assume it costs 1.
- // TODO: Once we have extract/insert subvector cost we need to use them.
- return LT.first * OpCost;
- }
-
- if (!TLI->isOperationExpand(ISD, LT.second)) {
- // If the operation is custom lowered, then assume that the code is twice
- // as expensive.
- return LT.first * 2 * OpCost;
- }
-
- // Else, assume that we need to scalarize this op.
- // TODO: If one of the types get legalized by splitting, handle this
- // similarly to what getCastInstrCost() does.
- if (auto *VTy = dyn_cast<VectorType>(Ty)) {
- unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
- InstructionCost Cost = getArithmeticInstrCost(
- Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo, Args, CxtI);
- // Return the cost of multiple scalar invocation plus the cost of
- // inserting and extracting the values.
- SmallVector<Type *> Tys(Args.size(), Ty);
- return getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
- }
-
- // We don't know anything about this scalar instruction.
- return OpCost;
- }
// Legalize the type.
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
@@ -742,40 +691,6 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
Type *RetTy = ICA.getReturnType();
- EVT OrigTy = TLI->getValueType(DL, RetTy);
- if (!OrigTy.isSimple()) {
- if (CostKind != TTI::TCK_CodeSize)
- return BaseT::getIntrinsicInstrCost(ICA, CostKind);
-
- // TODO: Combine these two logic paths.
- if (ICA.isTypeBasedOnly())
- return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
-
- unsigned RetVF =
- (RetTy->isVectorTy() ? cast<FixedVectorType>(RetTy)->getNumElements()
- : 1);
- const IntrinsicInst *I = ICA.getInst();
- const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
- FastMathFlags FMF = ICA.getFlags();
- // Assume that we need to scalarize this intrinsic.
-
- // Compute the scalarization overhead based on Args for a vector
- // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
- // CostModel will pass a vector RetTy and VF is 1.
- InstructionCost ScalarizationCost = InstructionCost::getInvalid();
- if (RetVF > 1) {
- ScalarizationCost = 0;
- if (!RetTy->isVoidTy())
- ScalarizationCost +=
- getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
- ScalarizationCost +=
- getOperandsScalarizationOverhead(Args, ICA.getArgTypes());
- }
-
- IntrinsicCostAttributes Attrs(ICA.getID(), RetTy, ICA.getArgTypes(), FMF, I,
- ScalarizationCost);
- return getIntrinsicInstrCost(Attrs, CostKind);
- }
// Legalize the type.
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 712f6dece911..1736c078eb83 100644
--- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -173,10 +173,8 @@ protected:
}
static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
- for (MachineLoop::iterator iter = LoopInfo.begin(),
- iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
- (*iter)->print(dbgs());
- }
+ for (const MachineLoop *L : LoopInfo)
+ L->print(dbgs());
}
// UTILITY FUNCTIONS
@@ -691,9 +689,7 @@ bool AMDGPUCFGStructurizer::prepare() {
SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
// Add an ExitBlk to loop that don't have one
- for (MachineLoopInfo::iterator It = MLI->begin(),
- E = MLI->end(); It != E; ++It) {
- MachineLoop *LoopRep = (*It);
+ for (MachineLoop *LoopRep : *MLI) {
MBBVector ExitingMBBs;
LoopRep->getExitingBlocks(ExitingMBBs);
@@ -827,14 +823,13 @@ bool AMDGPUCFGStructurizer::run() {
wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
// Detach retired Block, release memory.
- for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
- It != E; ++It) {
- if ((*It).second && (*It).second->IsRetired) {
- assert(((*It).first)->getNumber() != -1);
- LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";);
- (*It).first->eraseFromParent(); //Remove from the parent Function.
+ for (auto &It : BlockInfoMap) {
+ if (It.second && It.second->IsRetired) {
+ assert((It.first)->getNumber() != -1);
+ LLVM_DEBUG(dbgs() << "Erase BB" << (It.first)->getNumber() << "\n";);
+ It.first->eraseFromParent(); // Remove from the parent Function.
}
- delete (*It).second;
+ delete It.second;
}
BlockInfoMap.clear();
LLInfoMap.clear();
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 4acd77a9d5d2..2bb59086f391 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -246,8 +246,12 @@ public:
return isRegKind() && !hasModifiers();
}
+ bool isRegOrInline(unsigned RCID, MVT type) const {
+ return isRegClass(RCID) || isInlinableImm(type);
+ }
+
bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
- return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
+ return isRegOrInline(RCID, type) || isLiteralImm(type);
}
bool isRegOrImmWithInt16InputMods() const {
@@ -372,7 +376,7 @@ public:
bool isInlineValue() const;
bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
- return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
+ return isRegOrInline(RCID, type) && !hasModifiers();
}
bool isSCSrcB16() const {
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index d3644db7cf8b..a535c8cc0918 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
-def MUBUFAddr64 : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
-def MUBUFOffset : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
+def MUBUFAddr64 : ComplexPattern<iPTR, 4, "SelectMUBUFAddr64">;
+def MUBUFOffset : ComplexPattern<iPTR, 3, "SelectMUBUFOffset">;
-def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
-def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
+def MUBUFScratchOffen : ComplexPattern<iPTR, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
+def MUBUFScratchOffset : ComplexPattern<iPTR, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
def BUFAddrKind {
int Offset = 0;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index bb0aa648ff90..c7ec5308e6d0 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -6,12 +6,12 @@
//
//===----------------------------------------------------------------------===//
-def FlatOffset : ComplexPattern<i64, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>;
-def GlobalOffset : ComplexPattern<i64, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>;
-def ScratchOffset : ComplexPattern<i32, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>;
+def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>;
+def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>;
+def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>;
-def GlobalSAddr : ComplexPattern<i64, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
-def ScratchSAddr : ComplexPattern<i32, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
+def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
+def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
//===----------------------------------------------------------------------===//
// FLAT classes
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
index f3f664f7972a..912bcc792e4d 100644
--- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
@@ -120,8 +120,7 @@ unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst,
// We will now look at each of the currently executing instructions
// to find out if this wait instruction still needs to wait.
- for (auto I = IssuedInst.begin(), E = IssuedInst.end(); I != E; I++) {
- const InstRef &PrevIR = *I;
+ for (const InstRef &PrevIR : IssuedInst) {
const Instruction &PrevInst = *PrevIR.getInstruction();
const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size();
const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 29c37c706138..8a48a67b829c 100644
--- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -440,9 +440,8 @@ private:
CounterPropagateAddr(*Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
- for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
- BB->splice(InsertPos, BB, Clause.second[i]);
- }
+ for (MachineInstr *MI : Clause.second)
+ BB->splice(InsertPos, BB, MI);
CfCount += 2 * Clause.second.size();
}
@@ -452,9 +451,8 @@ private:
CounterPropagateAddr(*Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
- for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
- BB->splice(InsertPos, BB, Clause.second[i]);
- }
+ for (MachineInstr *MI : Clause.second)
+ BB->splice(InsertPos, BB, MI);
CfCount += Clause.second.size();
}
@@ -635,10 +633,10 @@ public:
CfCount++;
}
MI->eraseFromParent();
- for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
- EmitFetchClause(I, DL, FetchClauses[i], CfCount);
- for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
- EmitALUClause(I, DL, AluClauses[i], CfCount);
+ for (ClauseFile &CF : FetchClauses)
+ EmitFetchClause(I, DL, CF, CfCount);
+ for (ClauseFile &CF : AluClauses)
+ EmitALUClause(I, DL, CF, CfCount);
break;
}
default:
@@ -649,8 +647,7 @@ public:
break;
}
}
- for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
- MachineInstr *Alu = ToPopAfter[i];
+ for (MachineInstr *Alu : ToPopAfter) {
BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
TII->get(R600::CF_ALU_POP_AFTER))
.addImm(Alu->getOperand(0).getImm())
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index a7ebf72315cb..aec8b1ae4837 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -268,17 +268,15 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const {
{R600::OpName::src1_W, R600::OpName::src1_sel_W},
};
- for (unsigned j = 0; j < 8; j++) {
- MachineOperand &MO =
- MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
+ for (const auto &Op : OpTable) {
+ MachineOperand &MO = MI.getOperand(getOperandIdx(MI.getOpcode(), Op[0]));
Register Reg = MO.getReg();
if (Reg == R600::ALU_CONST) {
MachineOperand &Sel =
- MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
+ MI.getOperand(getOperandIdx(MI.getOpcode(), Op[1]));
Result.push_back(std::make_pair(&MO, Sel.getImm()));
continue;
}
-
}
return Result;
}
@@ -289,15 +287,14 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const {
{R600::OpName::src2, R600::OpName::src2_sel},
};
- for (unsigned j = 0; j < 3; j++) {
- int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]);
+ for (const auto &Op : OpTable) {
+ int SrcIdx = getOperandIdx(MI.getOpcode(), Op[0]);
if (SrcIdx < 0)
break;
MachineOperand &MO = MI.getOperand(SrcIdx);
Register Reg = MO.getReg();
if (Reg == R600::ALU_CONST) {
- MachineOperand &Sel =
- MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
+ MachineOperand &Sel = MI.getOperand(getOperandIdx(MI.getOpcode(), Op[1]));
Result.push_back(std::make_pair(&MO, Sel.getImm()));
continue;
}
@@ -521,12 +518,11 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
ValidSwizzle.clear();
unsigned ConstCount;
BankSwizzle TransBS = ALU_VEC_012_SCL_210;
- for (unsigned i = 0, e = IG.size(); i < e; ++i) {
- IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
- unsigned Op = getOperandIdx(IG[i]->getOpcode(),
- R600::OpName::bank_swizzle);
- ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
- IG[i]->getOperand(Op).getImm());
+ for (MachineInstr *MI : IG) {
+ IGSrcs.push_back(ExtractSrcs(*MI, PV, ConstCount));
+ unsigned Op = getOperandIdx(MI->getOpcode(), R600::OpName::bank_swizzle);
+ ValidSwizzle.push_back(
+ (R600InstrInfo::BankSwizzle)MI->getOperand(Op).getImm());
}
std::vector<std::pair<int, unsigned>> TransOps;
if (!isLastAluTrans)
@@ -542,8 +538,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
ALU_VEC_120_SCL_212,
ALU_VEC_102_SCL_221
};
- for (unsigned i = 0; i < 4; i++) {
- TransBS = TransSwz[i];
+ for (R600InstrInfo::BankSwizzle TransBS : TransSwz) {
if (!isConstCompatible(TransBS, TransOps, ConstCount))
continue;
bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
@@ -562,9 +557,9 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
assert (Consts.size() <= 12 && "Too many operands in instructions group");
unsigned Pair1 = 0, Pair2 = 0;
- for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
- unsigned ReadConstHalf = Consts[i] & 2;
- unsigned ReadConstIndex = Consts[i] & (~3);
+ for (unsigned Const : Consts) {
+ unsigned ReadConstHalf = Const & 2;
+ unsigned ReadConstIndex = Const & (~3);
unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
if (!Pair1) {
Pair1 = ReadHalfConst;
@@ -587,12 +582,11 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
const {
std::vector<unsigned> Consts;
SmallSet<int64_t, 4> Literals;
- for (unsigned i = 0, n = MIs.size(); i < n; i++) {
- MachineInstr &MI = *MIs[i];
- if (!isALUInstr(MI.getOpcode()))
+ for (MachineInstr *MI : MIs) {
+ if (!isALUInstr(MI->getOpcode()))
continue;
- for (const auto &Src : getSrcs(MI)) {
+ for (const auto &Src : getSrcs(*MI)) {
if (Src.first->getReg() == R600::ALU_LITERAL_X)
Literals.insert(Src.second);
if (Literals.size() > 4)
@@ -1330,11 +1324,11 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
MIB->getOperand(getOperandIdx(Opcode, R600::OpName::pred_sel))
.setReg(MO.getReg());
- for (unsigned i = 0; i < 14; i++) {
+ for (unsigned Operand : Operands) {
MachineOperand &MO = MI->getOperand(
- getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
+ getOperandIdx(MI->getOpcode(), getSlotedOps(Operand, Slot)));
assert (MO.isImm());
- setImmOperand(*MIB, Operands[i], MO.getImm());
+ setImmOperand(*MIB, Operand, MO.getImm());
}
MIB->getOperand(20).setImm(0);
return MIB;
diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
index 6aee2f591b56..d26879ed8d60 100644
--- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -328,9 +328,9 @@ SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
void R600SchedStrategy::LoadAlu() {
std::vector<SUnit *> &QSrc = Pending[IDAlu];
- for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
- AluKind AK = getAluKind(QSrc[i]);
- AvailableAlus[AK].push_back(QSrc[i]);
+ for (SUnit *SU : QSrc) {
+ AluKind AK = getAluKind(SU);
+ AvailableAlus[AK].push_back(SU);
}
QSrc.clear();
}
diff --git a/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp b/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
index ac6a3581e255..aa156190b7ae 100644
--- a/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
+++ b/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
@@ -307,8 +307,8 @@ class R600OpenCLImageTypeLoweringPass : public ModulePass {
// Build new MDNode.
SmallVector<Metadata *, 6> KernelMDArgs;
KernelMDArgs.push_back(ConstantAsMetadata::get(NewF));
- for (unsigned i = 0; i < NumKernelArgMDNodes; ++i)
- KernelMDArgs.push_back(MDNode::get(*Context, NewArgMDs.ArgVector[i]));
+ for (const MDVector &MDV : NewArgMDs.ArgVector)
+ KernelMDArgs.push_back(MDNode::get(*Context, MDV));
MDNode *NewMDNode = MDNode::get(*Context, KernelMDArgs);
return std::make_tuple(NewF, NewMDNode);
diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 72cf48c04e7f..795bc898a7bf 100644
--- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -150,19 +150,18 @@ bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
const {
unsigned CurrentUndexIdx = 0;
- for (DenseMap<Register, unsigned>::iterator It = ToMerge->RegToChan.begin(),
- E = ToMerge->RegToChan.end(); It != E; ++It) {
+ for (auto &It : ToMerge->RegToChan) {
DenseMap<Register, unsigned>::const_iterator PosInUntouched =
- Untouched->RegToChan.find((*It).first);
+ Untouched->RegToChan.find(It.first);
if (PosInUntouched != Untouched->RegToChan.end()) {
- Remap.push_back(std::pair<unsigned, unsigned>
- ((*It).second, (*PosInUntouched).second));
+ Remap.push_back(
+ std::pair<unsigned, unsigned>(It.second, (*PosInUntouched).second));
continue;
}
if (CurrentUndexIdx >= Untouched->UndefReg.size())
return false;
- Remap.push_back(std::pair<unsigned, unsigned>
- ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
+ Remap.push_back(std::pair<unsigned, unsigned>(
+ It.second, Untouched->UndefReg[CurrentUndexIdx++]));
}
return true;
@@ -172,9 +171,9 @@ static
unsigned getReassignedChan(
const std::vector<std::pair<unsigned, unsigned>> &RemapChan,
unsigned Chan) {
- for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
- if (RemapChan[j].first == Chan)
- return RemapChan[j].second;
+ for (const auto &J : RemapChan) {
+ if (J.first == Chan)
+ return J.second;
}
llvm_unreachable("Chan wasn't reassigned");
}
@@ -190,11 +189,10 @@ MachineInstr *R600VectorRegMerger::RebuildVector(
Register SrcVec = BaseRSI->Instr->getOperand(0).getReg();
DenseMap<Register, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
std::vector<Register> UpdatedUndef = BaseRSI->UndefReg;
- for (DenseMap<Register, unsigned>::iterator It = RSI->RegToChan.begin(),
- E = RSI->RegToChan.end(); It != E; ++It) {
+ for (const auto &It : RSI->RegToChan) {
Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
- unsigned SubReg = (*It).first;
- unsigned Swizzle = (*It).second;
+ unsigned SubReg = It.first;
+ unsigned Swizzle = It.second;
unsigned Chan = getReassignedChan(RemapChan, Swizzle);
MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
@@ -234,14 +232,12 @@ MachineInstr *R600VectorRegMerger::RebuildVector(
}
void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
- for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
- E = PreviousRegSeqByReg.end(); It != E; ++It) {
- std::vector<MachineInstr *> &MIs = (*It).second;
+ for (auto &It : PreviousRegSeqByReg) {
+ std::vector<MachineInstr *> &MIs = It.second;
MIs.erase(llvm::find(MIs, MI), MIs.end());
}
- for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
- E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
- std::vector<MachineInstr *> &MIs = (*It).second;
+ for (auto &It : PreviousRegSeqByUndefCount) {
+ std::vector<MachineInstr *> &MIs = It.second;
MIs.erase(llvm::find(MIs, MI), MIs.end());
}
}
@@ -255,9 +251,9 @@ void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
Offset = 3;
for (unsigned i = 0; i < 4; i++) {
unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
- for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
- if (RemapChan[j].first == Swizzle) {
- MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
+ for (const auto &J : RemapChan) {
+ if (J.first == Swizzle) {
+ MI.getOperand(i + Offset).setImm(J.second - 1);
break;
}
}
diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index beb0aad86e89..fbe2a1cd9fba 100644
--- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -127,8 +127,8 @@ private:
R600::OpName::src1,
R600::OpName::src2
};
- for (unsigned i = 0; i < 3; i++) {
- int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
+ for (unsigned Op : Ops) {
+ int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Op);
if (OperandIdx < 0)
continue;
Register Src = MI.getOperand(OperandIdx).getReg();
diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
index 99a1a8e9871a..c329bae50f92 100644
--- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -54,10 +54,8 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, R600::PRED_SEL_ONE);
reserveRegisterTuples(Reserved, R600::INDIRECT_BASE_ADDR);
- for (TargetRegisterClass::iterator I = R600::R600_AddrRegClass.begin(),
- E = R600::R600_AddrRegClass.end(); I != E; ++I) {
- reserveRegisterTuples(Reserved, *I);
- }
+ for (MCPhysReg R : R600::R600_AddrRegClass)
+ reserveRegisterTuples(Reserved, R);
TII->reserveIndirectRegisters(Reserved, MF, *this);
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 200e00ee5521..1f93284fc7ee 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1620,7 +1620,7 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
// Erase the REG_SEQUENCE eagerly, unless we followed a chain of COPY users,
// in which case we can erase them all later in runOnMachineFunction.
if (MRI->use_nodbg_empty(MI.getOperand(0).getReg()))
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ MI.eraseFromParent();
return true;
}
@@ -1821,7 +1821,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
auto &SrcOp = InstToErase->getOperand(1);
auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register();
- InstToErase->eraseFromParentAndMarkDBGValuesForRemoval();
+ InstToErase->eraseFromParent();
InstToErase = nullptr;
if (!SrcReg || SrcReg.isPhysical())
break;
@@ -1831,7 +1831,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
}
if (InstToErase && InstToErase->isRegSequence() &&
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg()))
- InstToErase->eraseFromParentAndMarkDBGValuesForRemoval();
+ InstToErase->eraseFromParent();
}
}
return true;
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 4706c74be721..d4fe74ecb96e 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1167,11 +1167,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
if (SpillVGPRToAGPR) {
// To track the spill frame indices handled in this pass.
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
+ BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
bool SeenDbgInstr = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ int FrameIndex;
if (MI.isDebugInstr())
SeenDbgInstr = true;
@@ -1191,10 +1193,18 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
SpillFIs.set(FI);
continue;
}
- }
+ } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
+ TII->isLoadFromStackSlot(MI, FrameIndex))
+ NonVGPRSpillFIs.set(FrameIndex);
}
}
+ // Stack slot coloring may assign different objets to the same stack slot.
+ // If not, then the VGPR to AGPR spill slot is dead.
+ for (unsigned FI : SpillFIs.set_bits())
+ if (!NonVGPRSpillFIs.test(FI))
+ FuncInfo->setVGPRToAGPRSpillDead(FI);
+
for (MachineBasicBlock &MBB : MF) {
for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
MBB.addLiveIn(Reg);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 35b72f5d201b..9f138136e6e9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -2062,33 +2063,30 @@ void SITargetLowering::allocateSpecialInputSGPRs(
SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();
- // We need to allocate these in place regardless of their use.
- const bool IsFixed = AMDGPUTargetMachine::EnableFixedFunctionABI;
-
// TODO: Unify handling with private memory pointers.
- if (IsFixed || Info.hasDispatchPtr())
+ if (Info.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
- if (IsFixed || Info.hasQueuePtr())
+ if (Info.hasQueuePtr())
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
// constant offset from the kernarg segment.
- if (IsFixed || Info.hasImplicitArgPtr())
+ if (Info.hasImplicitArgPtr())
allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);
- if (IsFixed || Info.hasDispatchID())
+ if (Info.hasDispatchID())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);
// flat_scratch_init is not applicable for non-kernel functions.
- if (IsFixed || Info.hasWorkGroupIDX())
+ if (Info.hasWorkGroupIDX())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX);
- if (IsFixed || Info.hasWorkGroupIDY())
+ if (Info.hasWorkGroupIDY())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY);
- if (IsFixed || Info.hasWorkGroupIDZ())
+ if (Info.hasWorkGroupIDZ())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);
}
@@ -2419,10 +2417,9 @@ SDValue SITargetLowering::LowerFormalArguments(
if (IsEntryFunc) {
allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);
- } else {
+ } else if (!IsGraphics) {
// For the fixed ABI, pass workitem IDs in the last argument register.
- if (AMDGPUTargetMachine::EnableFixedFunctionABI)
- allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
+ allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
}
if (IsKernel) {
@@ -2549,17 +2546,13 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Val);
}
- if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) {
- // Special inputs come after user arguments.
- allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
- }
-
// Start adding system SGPRs.
if (IsEntryFunc) {
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);
} else {
CCInfo.AllocateReg(Info->getScratchRSrcReg());
- allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
+ if (!IsGraphics)
+ allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
}
auto &ArgUsageInfo =
@@ -3123,8 +3116,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
- if (AMDGPUTargetMachine::EnableFixedFunctionABI &&
- CallConv != CallingConv::AMDGPU_Gfx) {
+ if (CallConv != CallingConv::AMDGPU_Gfx) {
// With a fixed ABI, allocate fixed registers before user arguments.
passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
}
@@ -3263,12 +3255,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
}
- if (!AMDGPUTargetMachine::EnableFixedFunctionABI &&
- CallConv != CallingConv::AMDGPU_Gfx) {
- // Copy special input registers after user input arguments.
- passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
- }
-
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
@@ -6282,10 +6268,6 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
}
- // Push back extra arguments.
- for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++)
- VAddrs.push_back(Op.getOperand(ArgOffset + I));
-
// Check for 16 bit addresses or derivatives and pack if true.
MVT VAddrVT =
Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();
@@ -6298,6 +6280,17 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
+ // Push back extra arguments.
+ for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) {
+ if (IsA16 && (Op.getOperand(ArgOffset + I).getValueType() == MVT::f16)) {
+ // Special handling of bias when A16 is on. Bias is of type half but
+ // occupies full 32-bit.
+ SDValue bias = DAG.getBuildVector( MVT::v2f16, DL, {Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});
+ VAddrs.push_back(bias);
+ } else
+ VAddrs.push_back(Op.getOperand(ArgOffset + I));
+ }
+
if (BaseOpcode->Gradients && !ST->hasG16() && (IsA16 != IsG16)) {
// 16 bit gradients are supported, but are tied to the A16 control
// so both gradients and addresses must be 16 bit
@@ -7502,8 +7495,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
assert(NodePtr.getValueType() == MVT::i32 ||
NodePtr.getValueType() == MVT::i64);
- assert(RayDir.getValueType() == MVT::v4f16 ||
- RayDir.getValueType() == MVT::v4f32);
+ assert(RayDir.getValueType() == MVT::v3f16 ||
+ RayDir.getValueType() == MVT::v3f32);
if (!Subtarget->hasGFX10_AEncoding()) {
emitRemovedIntrinsicError(DAG, DL, Op.getValueType());
@@ -9837,11 +9830,13 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
if (Opcode == AMDGPU::G_FCANONICALIZE)
return true;
- if (Opcode == AMDGPU::G_FCONSTANT) {
- auto F = MI->getOperand(1).getFPImm()->getValueAPF();
- if (F.isNaN() && F.isSignaling())
+ Optional<FPValueAndVReg> FCR;
+ // Constant splat (can be padded with undef) or scalar constant.
+ if (mi_match(Reg, MRI, MIPatternMatch::m_GFCstOrSplat(FCR))) {
+ if (FCR->Value.isSignaling())
return false;
- return !F.isDenormal() || denormalsEnabledForType(MRI.getType(Reg), MF);
+ return !FCR->Value.isDenormal() ||
+ denormalsEnabledForType(MRI.getType(FCR->VReg), MF);
}
if (MaxDepth == 0)
@@ -11514,7 +11509,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// Prefer VGPRs over AGPRs in mAI instructions where possible.
// This saves a chain-copy of registers and better ballance register
// use between vgpr and agpr as agpr tuples tend to be big.
- if (const MCOperandInfo *OpInfo = MI.getDesc().OpInfo) {
+ if (MI.getDesc().OpInfo) {
unsigned Opc = MI.getOpcode();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
for (auto I : { AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
@@ -12477,6 +12472,6 @@ SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
if (Size <= 256)
return Cost;
- Cost.first = (Size + 255) / 256;
+ Cost.first += (Size + 255) / 256;
return Cost;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c9d9dd1fb82c..6fbe5d45ce0a 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -30,6 +30,7 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/DebugCounter.h"
@@ -51,26 +52,6 @@ static cl::opt<bool> ForceEmitZeroFlag(
cl::init(false), cl::Hidden);
namespace {
-
-template <typename EnumT>
-class enum_iterator
- : public iterator_facade_base<enum_iterator<EnumT>,
- std::forward_iterator_tag, const EnumT> {
- EnumT Value;
-public:
- enum_iterator() = default;
- enum_iterator(EnumT Value) : Value(Value) {}
-
- enum_iterator &operator++() {
- Value = static_cast<EnumT>(Value + 1);
- return *this;
- }
-
- bool operator==(const enum_iterator &RHS) const { return Value == RHS.Value; }
-
- EnumT operator*() const { return Value; }
-};
-
// Class of object that encapsulates latest instruction counter score
// associated with the operand. Used for determining whether
// s_waitcnt instruction needs to be emitted.
@@ -78,27 +59,32 @@ public:
#define CNT_MASK(t) (1u << (t))
enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, VS_CNT, NUM_INST_CNTS };
+} // namespace
-iterator_range<enum_iterator<InstCounterType>> inst_counter_types() {
- return make_range(enum_iterator<InstCounterType>(VM_CNT),
- enum_iterator<InstCounterType>(NUM_INST_CNTS));
-}
+namespace llvm {
+template <> struct enum_iteration_traits<InstCounterType> {
+ static constexpr bool is_iterable = true;
+};
+} // namespace llvm
+
+namespace {
+auto inst_counter_types() { return enum_seq(VM_CNT, NUM_INST_CNTS); }
using RegInterval = std::pair<int, int>;
-struct {
+struct HardwareLimits {
unsigned VmcntMax;
unsigned ExpcntMax;
unsigned LgkmcntMax;
unsigned VscntMax;
-} HardwareLimits;
+};
-struct {
+struct RegisterEncoding {
unsigned VGPR0;
unsigned VGPRL;
unsigned SGPR0;
unsigned SGPRL;
-} RegisterEncoding;
+};
enum WaitEventType {
VMEM_ACCESS, // vector-memory read & write
@@ -194,18 +180,20 @@ void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
// "s_waitcnt 0" before use.
class WaitcntBrackets {
public:
- WaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {}
+ WaitcntBrackets(const GCNSubtarget *SubTarget, HardwareLimits Limits,
+ RegisterEncoding Encoding)
+ : ST(SubTarget), Limits(Limits), Encoding(Encoding) {}
- static unsigned getWaitCountMax(InstCounterType T) {
+ unsigned getWaitCountMax(InstCounterType T) const {
switch (T) {
case VM_CNT:
- return HardwareLimits.VmcntMax;
+ return Limits.VmcntMax;
case LGKM_CNT:
- return HardwareLimits.LgkmcntMax;
+ return Limits.LgkmcntMax;
case EXP_CNT:
- return HardwareLimits.ExpcntMax;
+ return Limits.ExpcntMax;
case VS_CNT:
- return HardwareLimits.VscntMax;
+ return Limits.VscntMax;
default:
break;
}
@@ -338,6 +326,8 @@ private:
unsigned OpNo, unsigned Val);
const GCNSubtarget *ST = nullptr;
+ HardwareLimits Limits = {};
+ RegisterEncoding Encoding = {};
unsigned ScoreLBs[NUM_INST_CNTS] = {0};
unsigned ScoreUBs[NUM_INST_CNTS] = {0};
unsigned PendingEvents = 0;
@@ -471,14 +461,14 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
unsigned Reg = TRI->getEncodingValue(AMDGPU::getMCReg(Op.getReg(), *ST));
if (TRI->isVectorRegister(*MRI, Op.getReg())) {
- assert(Reg >= RegisterEncoding.VGPR0 && Reg <= RegisterEncoding.VGPRL);
- Result.first = Reg - RegisterEncoding.VGPR0;
+ assert(Reg >= Encoding.VGPR0 && Reg <= Encoding.VGPRL);
+ Result.first = Reg - Encoding.VGPR0;
if (TRI->isAGPR(*MRI, Op.getReg()))
Result.first += AGPR_OFFSET;
assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
} else if (TRI->isSGPRReg(*MRI, Op.getReg())) {
- assert(Reg >= RegisterEncoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS);
- Result.first = Reg - RegisterEncoding.SGPR0 + NUM_ALL_VGPRS;
+ assert(Reg >= Encoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS);
+ Result.first = Reg - Encoding.SGPR0 + NUM_ALL_VGPRS;
assert(Result.first >= NUM_ALL_VGPRS &&
Result.first < SQ_MAX_PGM_SGPRS + NUM_ALL_VGPRS);
}
@@ -1589,20 +1579,22 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
for (auto T : inst_counter_types())
ForceEmitWaitcnt[T] = false;
- HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
- HardwareLimits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);
- HardwareLimits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV);
- HardwareLimits.VscntMax = ST->hasVscnt() ? 63 : 0;
+ HardwareLimits Limits = {};
+ Limits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
+ Limits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);
+ Limits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV);
+ Limits.VscntMax = ST->hasVscnt() ? 63 : 0;
unsigned NumVGPRsMax = ST->getAddressableNumVGPRs();
unsigned NumSGPRsMax = ST->getAddressableNumSGPRs();
assert(NumVGPRsMax <= SQ_MAX_PGM_VGPRS);
assert(NumSGPRsMax <= SQ_MAX_PGM_SGPRS);
- RegisterEncoding.VGPR0 = TRI->getEncodingValue(AMDGPU::VGPR0);
- RegisterEncoding.VGPRL = RegisterEncoding.VGPR0 + NumVGPRsMax - 1;
- RegisterEncoding.SGPR0 = TRI->getEncodingValue(AMDGPU::SGPR0);
- RegisterEncoding.SGPRL = RegisterEncoding.SGPR0 + NumSGPRsMax - 1;
+ RegisterEncoding Encoding = {};
+ Encoding.VGPR0 = TRI->getEncodingValue(AMDGPU::VGPR0);
+ Encoding.VGPRL = Encoding.VGPR0 + NumVGPRsMax - 1;
+ Encoding.SGPR0 = TRI->getEncodingValue(AMDGPU::SGPR0);
+ Encoding.SGPRL = Encoding.SGPR0 + NumSGPRsMax - 1;
TrackedWaitcntSet.clear();
BlockInfos.clear();
@@ -1652,9 +1644,9 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
*Brackets = *BI.Incoming;
} else {
if (!Brackets)
- Brackets = std::make_unique<WaitcntBrackets>(ST);
+ Brackets = std::make_unique<WaitcntBrackets>(ST, Limits, Encoding);
else
- *Brackets = WaitcntBrackets(ST);
+ *Brackets = WaitcntBrackets(ST, Limits, Encoding);
}
Modified |= insertWaitcntInBlock(MF, *BI.MBB, *Brackets);
@@ -1686,45 +1678,47 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
}
} while (Repeat);
- SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
-
- bool HaveScalarStores = false;
+ if (ST->hasScalarStores()) {
+ SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
+ bool HaveScalarStores = false;
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (!HaveScalarStores && TII->isScalarStore(MI))
- HaveScalarStores = true;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!HaveScalarStores && TII->isScalarStore(MI))
+ HaveScalarStores = true;
- if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
- MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
- EndPgmBlocks.push_back(&MBB);
+ if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
+ MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
+ EndPgmBlocks.push_back(&MBB);
+ }
}
- }
- if (HaveScalarStores) {
- // If scalar writes are used, the cache must be flushed or else the next
- // wave to reuse the same scratch memory can be clobbered.
- //
- // Insert s_dcache_wb at wave termination points if there were any scalar
- // stores, and only if the cache hasn't already been flushed. This could be
- // improved by looking across blocks for flushes in postdominating blocks
- // from the stores but an explicitly requested flush is probably very rare.
- for (MachineBasicBlock *MBB : EndPgmBlocks) {
- bool SeenDCacheWB = false;
+ if (HaveScalarStores) {
+ // If scalar writes are used, the cache must be flushed or else the next
+ // wave to reuse the same scratch memory can be clobbered.
+ //
+ // Insert s_dcache_wb at wave termination points if there were any scalar
+ // stores, and only if the cache hasn't already been flushed. This could
+ // be improved by looking across blocks for flushes in postdominating
+ // blocks from the stores but an explicitly requested flush is probably
+ // very rare.
+ for (MachineBasicBlock *MBB : EndPgmBlocks) {
+ bool SeenDCacheWB = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I) {
- if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
- SeenDCacheWB = true;
- else if (TII->isScalarStore(*I))
- SeenDCacheWB = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
+ SeenDCacheWB = true;
+ else if (TII->isScalarStore(*I))
+ SeenDCacheWB = false;
- // FIXME: It would be better to insert this before a waitcnt if any.
- if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
- !SeenDCacheWB) {
- Modified = true;
- BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+ // FIXME: It would be better to insert this before a waitcnt if any.
+ if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
+ !SeenDCacheWB) {
+ Modified = true;
+ BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+ }
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 92f5322b8ad2..1755b93538ce 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -899,8 +899,12 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
if (RI.isAGPRClass(RC)) {
- Opcode = (RI.hasVGPRs(SrcRC)) ?
- AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
+ if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
+ Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
+ else if (RI.hasVGPRs(SrcRC))
+ Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
+ else
+ Opcode = AMDGPU::INSTRUCTION_LIST_END;
} else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
} else if ((Size % 64 == 0) && RI.hasVGPRs(RC) &&
@@ -1417,6 +1421,33 @@ static unsigned getAGPRSpillSaveOpcode(unsigned Size) {
}
}
+static unsigned getAVSpillSaveOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_AV32_SAVE;
+ case 8:
+ return AMDGPU::SI_SPILL_AV64_SAVE;
+ case 12:
+ return AMDGPU::SI_SPILL_AV96_SAVE;
+ case 16:
+ return AMDGPU::SI_SPILL_AV128_SAVE;
+ case 20:
+ return AMDGPU::SI_SPILL_AV160_SAVE;
+ case 24:
+ return AMDGPU::SI_SPILL_AV192_SAVE;
+ case 28:
+ return AMDGPU::SI_SPILL_AV224_SAVE;
+ case 32:
+ return AMDGPU::SI_SPILL_AV256_SAVE;
+ case 64:
+ return AMDGPU::SI_SPILL_AV512_SAVE;
+ case 128:
+ return AMDGPU::SI_SPILL_AV1024_SAVE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
Register SrcReg, bool isKill,
@@ -1463,21 +1494,11 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
return;
}
- unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(SpillSize)
- : getVGPRSpillSaveOpcode(SpillSize);
+ unsigned Opcode = RI.isVectorSuperClass(RC) ? getAVSpillSaveOpcode(SpillSize)
+ : RI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(SpillSize)
+ : getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();
- if (RI.isVectorSuperClass(RC)) {
- // Convert an AV spill into a VGPR spill. Introduce a copy from AV to an
- // equivalent VGPR register beforehand. Regalloc might want to introduce
- // AV spills only to be relevant until rewriter at which they become
- // either spills of VGPRs or AGPRs.
- Register TmpVReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(RC));
- BuildMI(MBB, MI, DL, get(TargetOpcode::COPY), TmpVReg)
- .addReg(SrcReg, RegState::Kill);
- SrcReg = TmpVReg;
- }
-
BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(isKill)) // data
.addFrameIndex(FrameIndex) // addr
@@ -1567,6 +1588,33 @@ static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
}
}
+static unsigned getAVSpillRestoreOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_AV32_RESTORE;
+ case 8:
+ return AMDGPU::SI_SPILL_AV64_RESTORE;
+ case 12:
+ return AMDGPU::SI_SPILL_AV96_RESTORE;
+ case 16:
+ return AMDGPU::SI_SPILL_AV128_RESTORE;
+ case 20:
+ return AMDGPU::SI_SPILL_AV160_RESTORE;
+ case 24:
+ return AMDGPU::SI_SPILL_AV192_RESTORE;
+ case 28:
+ return AMDGPU::SI_SPILL_AV224_RESTORE;
+ case 32:
+ return AMDGPU::SI_SPILL_AV256_RESTORE;
+ case 64:
+ return AMDGPU::SI_SPILL_AV512_RESTORE;
+ case 128:
+ return AMDGPU::SI_SPILL_AV1024_RESTORE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
Register DestReg, int FrameIndex,
@@ -1609,26 +1657,15 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}
- unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
- : getVGPRSpillRestoreOpcode(SpillSize);
-
- bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
- Register TmpReg = DestReg;
- if (IsVectorSuperClass) {
- // For AV classes, insert the spill restore to a VGPR followed by a copy
- // into an equivalent AV register.
- MachineRegisterInfo &MRI = MF->getRegInfo();
- DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(RC));
- }
+ unsigned Opcode = RI.isVectorSuperClass(RC)
+ ? getAVSpillRestoreOpcode(SpillSize)
+ : RI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
+ : getVGPRSpillRestoreOpcode(SpillSize);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addFrameIndex(FrameIndex) // vaddr
- .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
- .addImm(0) // offset
- .addMemOperand(MMO);
-
- if (IsVectorSuperClass)
- BuildMI(MBB, MI, DL, get(TargetOpcode::COPY), TmpReg)
- .addReg(DestReg, RegState::Kill);
+ .addFrameIndex(FrameIndex) // vaddr
+ .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
+ .addImm(0) // offset
+ .addMemOperand(MMO);
}
void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
@@ -2358,8 +2395,6 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx));
auto *ShAmt = MCConstantExpr::create(32, MCCtx);
OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx));
-
- return;
}
unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
@@ -3106,23 +3141,26 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
}
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
- int64_t &Imm) {
+ int64_t &Imm, MachineInstr **DefMI = nullptr) {
if (Reg.isPhysical())
return false;
auto *Def = MRI.getUniqueVRegDef(Reg);
if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
Imm = Def->getOperand(1).getImm();
+ if (DefMI)
+ *DefMI = Def;
return true;
}
return false;
}
-static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm) {
+static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
+ MachineInstr **DefMI = nullptr) {
if (!MO->isReg())
return false;
const MachineFunction *MF = MO->getParent()->getParent()->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
- return getFoldableImm(MO->getReg(), MRI, Imm);
+ return getFoldableImm(MO->getReg(), MRI, Imm, DefMI);
}
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
@@ -3195,8 +3233,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
// If we have an SGPR input, we will violate the constant bus restriction.
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
+ MachineInstr *DefMI;
+ const auto killDef = [&DefMI, &MBB, this]() -> void {
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ // The only user is the instruction which will be killed.
+ if (!MRI.hasOneNonDBGUse(DefMI->getOperand(0).getReg()))
+ return;
+ // We cannot just remove the DefMI here, calling pass will crash.
+ DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
+ for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
+ DefMI->RemoveOperand(I);
+ };
+
int64_t Imm;
- if (getFoldableImm(Src2, Imm)) {
+ if (getFoldableImm(Src2, Imm, &DefMI)) {
unsigned NewOpc =
IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
@@ -3209,13 +3259,14 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef();
return MIB;
}
}
unsigned NewOpc = IsFMA
? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
- if (getFoldableImm(Src1, Imm)) {
+ if (getFoldableImm(Src1, Imm, &DefMI)) {
if (pseudoToMCOpcode(NewOpc) != -1) {
MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
@@ -3225,10 +3276,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef();
return MIB;
}
}
- if (getFoldableImm(Src0, Imm)) {
+ if (getFoldableImm(Src0, Imm, &DefMI)) {
if (pseudoToMCOpcode(NewOpc) != -1 &&
isOperandLegal(
MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
@@ -3241,12 +3293,13 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef();
return MIB;
}
}
}
- unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16_e64
+ unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
: IsF64 ? AMDGPU::V_FMA_F64_e64
: AMDGPU::V_FMA_F32_e64)
: (IsF16 ? AMDGPU::V_MAD_F16_e64 : AMDGPU::V_MAD_F32_e64);
@@ -3605,12 +3658,6 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const {
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
// Can't shrink instruction with three operands.
- // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
- // a special case for it. It can only be shrunk if the third operand
- // is vcc, and src0_modifiers and src1_modifiers are not set.
- // We should handle this the same way we handle vopc, by addding
- // a register allocation hint pre-regalloc and then do the shrinking
- // post-regalloc.
if (Src2) {
switch (MI.getOpcode()) {
default: return false;
@@ -4563,8 +4610,9 @@ static unsigned adjustAllocatableRegClass(const GCNSubtarget &ST,
unsigned RCID,
bool IsAllocatable) {
if ((IsAllocatable || !ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) &&
- (TID.mayLoad() || TID.mayStore() ||
- (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::MIMG)))) {
+ (((TID.mayLoad() || TID.mayStore()) &&
+ !(TID.TSFlags & SIInstrFlags::VGPRSpill)) ||
+ (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::MIMG)))) {
switch (RCID) {
case AMDGPU::AV_32RegClassID: return AMDGPU::VGPR_32RegClassID;
case AMDGPU::AV_64RegClassID: return AMDGPU::VReg_64RegClassID;
@@ -5001,8 +5049,7 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
--ConstantBusLimit;
}
- for (unsigned i = 0; i < 3; ++i) {
- int Idx = VOP3Idx[i];
+ for (int Idx : VOP3Idx) {
if (Idx == -1)
break;
MachineOperand &MO = MI.getOperand(Idx);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 47ee83eb9351..dda92d3d25ff 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1350,11 +1350,11 @@ def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
// Complex patterns
//===----------------------------------------------------------------------===//
-def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
-def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
-def DS128Bit8ByteAligned : ComplexPattern<i64, 3, "SelectDS128Bit8ByteAligned">;
+def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">;
+def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">;
+def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">;
-def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">;
+def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">;
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d55d8da8699a..636337ede000 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -761,6 +761,17 @@ defm SI_SPILL_A256 : SI_SPILL_VGPR <AReg_256, 1>;
defm SI_SPILL_A512 : SI_SPILL_VGPR <AReg_512, 1>;
defm SI_SPILL_A1024 : SI_SPILL_VGPR <AReg_1024, 1>;
+defm SI_SPILL_AV32 : SI_SPILL_VGPR <AV_32, 1>;
+defm SI_SPILL_AV64 : SI_SPILL_VGPR <AV_64, 1>;
+defm SI_SPILL_AV96 : SI_SPILL_VGPR <AV_96, 1>;
+defm SI_SPILL_AV128 : SI_SPILL_VGPR <AV_128, 1>;
+defm SI_SPILL_AV160 : SI_SPILL_VGPR <AV_160, 1>;
+defm SI_SPILL_AV192 : SI_SPILL_VGPR <AV_192, 1>;
+defm SI_SPILL_AV224 : SI_SPILL_VGPR <AV_224, 1>;
+defm SI_SPILL_AV256 : SI_SPILL_VGPR <AV_256, 1>;
+defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;
+defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;
+
def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
(outs SReg_64:$dst),
(ins si_ga:$ptr_lo, si_ga:$ptr_hi),
@@ -2106,6 +2117,19 @@ def : GCNPat <
} // end isWave32
def : GCNPat <
+ (i32 (DivergentBinFrag<xor> i32:$src0, (i32 -1))),
+ (V_NOT_B32_e32 $src0)
+>;
+
+def : GCNPat <
+ (i64 (DivergentBinFrag<xor> i64:$src0, (i64 -1))),
+ (REG_SEQUENCE VReg_64,
+ (V_NOT_B32_e32 (i32 (EXTRACT_SUBREG i64:$src0, sub0))), sub0,
+ (V_NOT_B32_e32 (i32 (EXTRACT_SUBREG i64:$src0, sub1))), sub1
+ )
+>;
+
+def : GCNPat <
(f16 (sint_to_fp i1:$src)),
(V_CVT_F16_F32_e32 (
V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
@@ -2188,18 +2212,18 @@ def : GCNPat <
>;
def : GCNPat <
- (i1 (trunc i32:$a)),
- (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
+ (i1 (DivergentUnaryFrag<trunc> i32:$a)),
+ (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
>;
def : GCNPat <
- (i1 (trunc i16:$a)),
- (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
+ (i1 (DivergentUnaryFrag<trunc> i16:$a)),
+ (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
>;
def : GCNPat <
- (i1 (trunc i64:$a)),
- (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1),
+ (i1 (DivergentUnaryFrag<trunc> i64:$a)),
+ (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1),
(i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
>;
@@ -2405,21 +2429,37 @@ def : GCNPat <
// COPY is workaround tablegen bug from multiple outputs
// from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat <
- (v2i16 (build_vector (i16 0), (i16 SReg_32:$src1))),
+ (v2i16 (UniformBinFrag<build_vector> (i16 0), (i16 SReg_32:$src1))),
(S_LSHL_B32 SReg_32:$src1, (i16 16))
>;
def : GCNPat <
- (v2i16 (build_vector (i16 SReg_32:$src1), (i16 0))),
+ (v2i16 (DivergentBinFrag<build_vector> (i16 0), (i16 SReg_32:$src1))),
+ (v2i16 (V_LSHLREV_B32_e64 (i16 16), SReg_32:$src1))
+>;
+
+
+def : GCNPat <
+ (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src1), (i16 0))),
(S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
>;
def : GCNPat <
- (v2f16 (build_vector (f16 SReg_32:$src1), (f16 FP_ZERO))),
+ (v2i16 (DivergentBinFrag<build_vector> (i16 SReg_32:$src1), (i16 0))),
+ (v2i16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src1))
+>;
+
+def : GCNPat <
+ (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src1), (f16 FP_ZERO))),
(S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
>;
def : GCNPat <
+ (v2f16 (DivergentBinFrag<build_vector> (f16 SReg_32:$src1), (f16 FP_ZERO))),
+ (v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src1))
+>;
+
+def : GCNPat <
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
>;
@@ -2435,42 +2475,74 @@ def : GCNPat <
>;
def : GCNPat <
- (v2i16 (build_vector (i16 undef), (i16 SReg_32:$src1))),
+ (v2i16 (UniformBinFrag<build_vector> (i16 undef), (i16 SReg_32:$src1))),
(S_LSHL_B32 SReg_32:$src1, (i32 16))
>;
def : GCNPat <
- (v2f16 (build_vector (f16 undef), (f16 SReg_32:$src1))),
+ (v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 SReg_32:$src1))),
+ (v2i16 (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1))
+>;
+
+
+def : GCNPat <
+ (v2f16 (UniformBinFrag<build_vector> (f16 undef), (f16 SReg_32:$src1))),
(S_LSHL_B32 SReg_32:$src1, (i32 16))
>;
+def : GCNPat <
+ (v2f16 (DivergentBinFrag<build_vector> (f16 undef), (f16 SReg_32:$src1))),
+ (v2f16 (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1))
+>;
+
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
- (v2i16 (build_vector (i16 SReg_32:$src0), (i16 SReg_32:$src1))),
+ (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src0), (i16 SReg_32:$src1))),
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
+def : GCNPat <
+ (v2i16 (DivergentBinFrag<build_vector> (i16 SReg_32:$src0), (i16 SReg_32:$src1))),
+ (v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0))))
+>;
+
// With multiple uses of the shift, this will duplicate the shift and
// increase register pressure.
def : GCNPat <
- (v2i16 (build_vector (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
+ (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
(v2i16 (S_PACK_LH_B32_B16 SReg_32:$src0, SReg_32:$src1))
>;
+def : GCNPat <
+ (v2i16 (DivergentBinFrag<build_vector> (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
+ (v2i16 (V_BFI_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src0, SReg_32:$src1))
+>;
+
def : GCNPat <
- (v2i16 (build_vector (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))),
+ (v2i16 (UniformBinFrag<build_vector> (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))),
(i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
(S_PACK_HH_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
-// TODO: Should source modifiers be matched to v_pack_b32_f16?
def : GCNPat <
- (v2f16 (build_vector (f16 SReg_32:$src0), (f16 SReg_32:$src1))),
+ (v2i16 (DivergentBinFrag<build_vector> (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))),
+ (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
+ (v2i16 (V_AND_OR_B32_e64 SReg_32:$src1, (i32 (V_MOV_B32_e32 (i32 0xffff0000))), (i32 (V_LSHRREV_B32_e64 (i32 16), SReg_32:$src0))))
+>;
+
+def : GCNPat <
+ (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src0), (f16 SReg_32:$src1))),
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
def : GCNPat <
+ (v2f16 (DivergentBinFrag<build_vector> (f16 SReg_32:$src0), (f16 SReg_32:$src1))),
+ (v2f16 (V_LSHL_OR_B32_e64 SReg_32:$src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src0))))
+>;
+
+
+def : GCNPat <
(v2f16 (is_canonicalized<build_vector> (f16 (VOP3Mods (f16 VGPR_32:$src0), i32:$src0_mods)),
(f16 (VOP3Mods (f16 VGPR_32:$src1), i32:$src1_mods)))),
(V_PACK_B32_F16_e64 $src0_mods, VGPR_32:$src0, $src1_mods, VGPR_32:$src1)
@@ -2866,6 +2938,18 @@ def G_AMDGPU_UMED3 : AMDGPUGenericInstruction {
let hasSideEffects = 0;
}
+def G_AMDGPU_FMED3 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_AMDGPU_CLAMP : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
// operand Expects a MachineMemOperand in addition to explicit
// operands.
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index c4007f56f350..3ce368ef4db9 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -62,11 +62,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// calls.
const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
- // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
- // have any calls.
- const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
- CC != CallingConv::AMDGPU_Gfx &&
- (!isEntryFunction() || HasCalls);
const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
CC == CallingConv::SPIR_KERNEL;
@@ -80,7 +75,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
if (!isEntryFunction()) {
- if (UseFixedABI)
+ if (CC != CallingConv::AMDGPU_Gfx)
ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
// TODO: Pick a high register, and shift down, similar to a kernel.
@@ -110,20 +105,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
else if (ST.isMesaGfxShader(F))
ImplicitBufferPtr = true;
- if (UseFixedABI) {
- DispatchPtr = true;
- QueuePtr = true;
- ImplicitArgPtr = true;
- WorkGroupIDX = true;
- WorkGroupIDY = true;
- WorkGroupIDZ = true;
- WorkItemIDX = true;
- WorkItemIDY = true;
- WorkItemIDZ = true;
-
- // FIXME: We don't need this?
- DispatchID = true;
- } else if (!AMDGPU::isGraphics(CC)) {
+ if (!AMDGPU::isGraphics(CC)) {
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
WorkGroupIDX = true;
@@ -462,7 +444,7 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
MFI.setStackID(i, TargetStackID::Default);
for (auto &R : VGPRToAGPRSpills) {
- if (R.second.FullyAllocated)
+ if (R.second.IsDead)
MFI.RemoveStackObject(R.first);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index c305bc20e40d..8accbf611c5f 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -465,6 +465,7 @@ public:
struct VGPRSpillToAGPR {
SmallVector<MCPhysReg, 32> Lanes;
bool FullyAllocated = false;
+ bool IsDead = false;
};
// Map WWM VGPR to a stack slot that is used to save/restore it in the
@@ -546,6 +547,12 @@ public:
: I->second.Lanes[Lane];
}
+ void setVGPRToAGPRSpillDead(int FrameIndex) {
+ auto I = VGPRToAGPRSpills.find(FrameIndex);
+ if (I != VGPRToAGPRSpills.end())
+ I->second.IsDead = true;
+ }
+
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 5590d84cc3ab..81db66a98ddf 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -869,29 +869,27 @@ void SIScheduleBlockCreator::colorComputeReservedDependencies() {
}
void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
- unsigned DAGSize = DAG->SUnits.size();
std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
// Every combination of colors given by the top down
// and bottom up Reserved node dependency
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[i];
+ for (const SUnit &SU : DAG->SUnits) {
std::pair<unsigned, unsigned> SUColors;
// High latency instructions: already given.
- if (CurrentColoring[SU->NodeNum])
+ if (CurrentColoring[SU.NodeNum])
continue;
- SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
- SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];
+ SUColors.first = CurrentTopDownReservedDependencyColoring[SU.NodeNum];
+ SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.NodeNum];
std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
ColorCombinations.find(SUColors);
if (Pos != ColorCombinations.end()) {
- CurrentColoring[SU->NodeNum] = Pos->second;
+ CurrentColoring[SU.NodeNum] = Pos->second;
} else {
- CurrentColoring[SU->NodeNum] = NextNonReservedID;
+ CurrentColoring[SU.NodeNum] = NextNonReservedID;
ColorCombinations[SUColors] = NextNonReservedID++;
}
}
@@ -1232,15 +1230,13 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria
}
// Free root and leafs of all blocks to enable scheduling inside them.
- for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
- SIScheduleBlock *Block = CurrentBlocks[i];
+ for (SIScheduleBlock *Block : CurrentBlocks)
Block->finalizeUnits();
- }
- LLVM_DEBUG(dbgs() << "Blocks created:\n\n";
- for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
- SIScheduleBlock *Block = CurrentBlocks[i];
- Block->printDebug(true);
- });
+ LLVM_DEBUG({
+ dbgs() << "Blocks created:\n\n";
+ for (SIScheduleBlock *Block : CurrentBlocks)
+ Block->printDebug(true);
+ });
}
// Two functions taken from Codegen/MachineScheduler.cpp
@@ -1379,9 +1375,9 @@ void SIScheduleBlockCreator::scheduleInsideBlocks() {
}
}
- LLVM_DEBUG(for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
- SIScheduleBlock *Block = CurrentBlocks[i];
- Block->printDebug(true);
+ LLVM_DEBUG({
+ for (SIScheduleBlock *Block : CurrentBlocks)
+ Block->printDebug(true);
});
}
@@ -1437,8 +1433,7 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
// found for several parents, we increment the usage of the one with the
// highest topological index.
LiveOutRegsNumUsages.resize(Blocks.size());
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- SIScheduleBlock *Block = Blocks[i];
+ for (SIScheduleBlock *Block : Blocks) {
for (unsigned Reg : Block->getInRegs()) {
bool Found = false;
int topoInd = -1;
@@ -1502,8 +1497,7 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
// Fill LiveRegsConsumers for regs that were already
// defined before scheduling.
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- SIScheduleBlock *Block = Blocks[i];
+ for (SIScheduleBlock *Block : Blocks) {
for (unsigned Reg : Block->getInRegs()) {
bool Found = false;
for (SIScheduleBlock* Pred: Block->getPreds()) {
@@ -1700,10 +1694,7 @@ void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
decreaseLiveRegs(Block, Block->getInRegs());
addLiveRegs(Block->getOutRegs());
releaseBlockSuccs(Block);
- for (std::map<unsigned, unsigned>::iterator RegI =
- LiveOutRegsNumUsages[Block->getID()].begin(),
- E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
- std::pair<unsigned, unsigned> RegP = *RegI;
+ for (const auto &RegP : LiveOutRegsNumUsages[Block->getID()]) {
// We produce this register, thus it must not be previously alive.
assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
LiveRegsConsumers[RegP.first] == 0);
@@ -1759,8 +1750,7 @@ SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
ScheduledBlocks = Scheduler.getBlocks();
- for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
- SIScheduleBlock *Block = ScheduledBlocks[b];
+ for (SIScheduleBlock *Block : ScheduledBlocks) {
std::vector<SUnit*> SUs = Block->getScheduledUnits();
for (SUnit* SU : SUs)
@@ -2000,9 +1990,8 @@ void SIScheduleDAGMI::schedule()
assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
TopRPTracker.setPos(CurrentTop);
- for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
- E = ScheduledSUnits.end(); I != E; ++I) {
- SUnit *SU = &SUnits[*I];
+ for (unsigned I : ScheduledSUnits) {
+ SUnit *SU = &SUnits[I];
scheduleMI(SU, true);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index a1d9a23a5084..21aed4ececb5 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -210,6 +210,7 @@ struct SGPRSpillBuilder {
auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
if (!TmpVGPRLive)
I.addReg(TmpVGPR, RegState::ImplicitDefine);
+ I->getOperand(2).setIsDead(true); // Mark SCC as dead.
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
}
}
@@ -242,9 +243,10 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
/*IsKill*/ false);
auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
- if (!TmpVGPRLive) {
+ if (!TmpVGPRLive)
I.addReg(TmpVGPR, RegState::ImplicitKill);
- }
+ I->getOperand(2).setIsDead(true); // Mark SCC as dead.
+
// Restore active lanes
if (TmpVGPRLive)
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
@@ -267,9 +269,11 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
/*IsKill*/ false);
// Spill inactive lanes
- BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
- BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
}
}
@@ -908,6 +912,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V1024_RESTORE:
case AMDGPU::SI_SPILL_A1024_SAVE:
case AMDGPU::SI_SPILL_A1024_RESTORE:
+ case AMDGPU::SI_SPILL_AV1024_SAVE:
+ case AMDGPU::SI_SPILL_AV1024_RESTORE:
return 32;
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S512_RESTORE:
@@ -915,6 +921,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V512_RESTORE:
case AMDGPU::SI_SPILL_A512_SAVE:
case AMDGPU::SI_SPILL_A512_RESTORE:
+ case AMDGPU::SI_SPILL_AV512_SAVE:
+ case AMDGPU::SI_SPILL_AV512_RESTORE:
return 16;
case AMDGPU::SI_SPILL_S256_SAVE:
case AMDGPU::SI_SPILL_S256_RESTORE:
@@ -922,6 +930,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_A256_SAVE:
case AMDGPU::SI_SPILL_A256_RESTORE:
+ case AMDGPU::SI_SPILL_AV256_SAVE:
+ case AMDGPU::SI_SPILL_AV256_RESTORE:
return 8;
case AMDGPU::SI_SPILL_S224_SAVE:
case AMDGPU::SI_SPILL_S224_RESTORE:
@@ -929,6 +939,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V224_RESTORE:
case AMDGPU::SI_SPILL_A224_SAVE:
case AMDGPU::SI_SPILL_A224_RESTORE:
+ case AMDGPU::SI_SPILL_AV224_SAVE:
+ case AMDGPU::SI_SPILL_AV224_RESTORE:
return 7;
case AMDGPU::SI_SPILL_S192_SAVE:
case AMDGPU::SI_SPILL_S192_RESTORE:
@@ -936,6 +948,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V192_RESTORE:
case AMDGPU::SI_SPILL_A192_SAVE:
case AMDGPU::SI_SPILL_A192_RESTORE:
+ case AMDGPU::SI_SPILL_AV192_SAVE:
+ case AMDGPU::SI_SPILL_AV192_RESTORE:
return 6;
case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S160_RESTORE:
@@ -943,6 +957,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V160_RESTORE:
case AMDGPU::SI_SPILL_A160_SAVE:
case AMDGPU::SI_SPILL_A160_RESTORE:
+ case AMDGPU::SI_SPILL_AV160_SAVE:
+ case AMDGPU::SI_SPILL_AV160_RESTORE:
return 5;
case AMDGPU::SI_SPILL_S128_SAVE:
case AMDGPU::SI_SPILL_S128_RESTORE:
@@ -950,6 +966,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V128_RESTORE:
case AMDGPU::SI_SPILL_A128_SAVE:
case AMDGPU::SI_SPILL_A128_RESTORE:
+ case AMDGPU::SI_SPILL_AV128_SAVE:
+ case AMDGPU::SI_SPILL_AV128_RESTORE:
return 4;
case AMDGPU::SI_SPILL_S96_SAVE:
case AMDGPU::SI_SPILL_S96_RESTORE:
@@ -957,6 +975,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V96_RESTORE:
case AMDGPU::SI_SPILL_A96_SAVE:
case AMDGPU::SI_SPILL_A96_RESTORE:
+ case AMDGPU::SI_SPILL_AV96_SAVE:
+ case AMDGPU::SI_SPILL_AV96_RESTORE:
return 3;
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S64_RESTORE:
@@ -964,6 +984,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V64_RESTORE:
case AMDGPU::SI_SPILL_A64_SAVE:
case AMDGPU::SI_SPILL_A64_RESTORE:
+ case AMDGPU::SI_SPILL_AV64_SAVE:
+ case AMDGPU::SI_SPILL_AV64_RESTORE:
return 2;
case AMDGPU::SI_SPILL_S32_SAVE:
case AMDGPU::SI_SPILL_S32_RESTORE:
@@ -971,6 +993,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V32_RESTORE:
case AMDGPU::SI_SPILL_A32_SAVE:
case AMDGPU::SI_SPILL_A32_RESTORE:
+ case AMDGPU::SI_SPILL_AV32_SAVE:
+ case AMDGPU::SI_SPILL_AV32_RESTORE:
return 1;
default: llvm_unreachable("Invalid spill opcode");
}
@@ -1240,9 +1264,10 @@ void SIRegisterInfo::buildSpillLoadStore(
if (ScratchOffsetReg == AMDGPU::NoRegister) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
} else {
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
+ auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
.addReg(ScratchOffsetReg)
.addImm(Offset);
+ Add->getOperand(3).setIsDead(); // Mark SCC as dead.
}
Offset = 0;
@@ -1810,7 +1835,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_A128_SAVE:
case AMDGPU::SI_SPILL_A96_SAVE:
case AMDGPU::SI_SPILL_A64_SAVE:
- case AMDGPU::SI_SPILL_A32_SAVE: {
+ case AMDGPU::SI_SPILL_A32_SAVE:
+ case AMDGPU::SI_SPILL_AV1024_SAVE:
+ case AMDGPU::SI_SPILL_AV512_SAVE:
+ case AMDGPU::SI_SPILL_AV256_SAVE:
+ case AMDGPU::SI_SPILL_AV224_SAVE:
+ case AMDGPU::SI_SPILL_AV192_SAVE:
+ case AMDGPU::SI_SPILL_AV160_SAVE:
+ case AMDGPU::SI_SPILL_AV128_SAVE:
+ case AMDGPU::SI_SPILL_AV96_SAVE:
+ case AMDGPU::SI_SPILL_AV64_SAVE:
+ case AMDGPU::SI_SPILL_AV32_SAVE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
@@ -1846,7 +1881,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_A224_RESTORE:
case AMDGPU::SI_SPILL_A256_RESTORE:
case AMDGPU::SI_SPILL_A512_RESTORE:
- case AMDGPU::SI_SPILL_A1024_RESTORE: {
+ case AMDGPU::SI_SPILL_A1024_RESTORE:
+ case AMDGPU::SI_SPILL_AV32_RESTORE:
+ case AMDGPU::SI_SPILL_AV64_RESTORE:
+ case AMDGPU::SI_SPILL_AV96_RESTORE:
+ case AMDGPU::SI_SPILL_AV128_RESTORE:
+ case AMDGPU::SI_SPILL_AV160_RESTORE:
+ case AMDGPU::SI_SPILL_AV192_RESTORE:
+ case AMDGPU::SI_SPILL_AV224_RESTORE:
+ case AMDGPU::SI_SPILL_AV256_RESTORE:
+ case AMDGPU::SI_SPILL_AV512_RESTORE:
+ case AMDGPU::SI_SPILL_AV1024_RESTORE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 3a372d4519fb..c8f1daf26de9 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -731,11 +731,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- // getVOPe32 could be -1 here if we started with an instruction that had
- // a 32-bit encoding and then commuted it to an instruction that did not.
- if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
- continue;
-
int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
if (TII->isVOPC(Op32)) {
@@ -776,10 +771,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand *SDst = TII->getNamedOperand(MI,
AMDGPU::OpName::sdst);
- // Check the carry-in operand for v_addc_u32_e64.
- const MachineOperand *Src2 = TII->getNamedOperand(MI,
- AMDGPU::OpName::src2);
-
if (SDst) {
bool Next = false;
@@ -791,6 +782,8 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// All of the instructions with carry outs also have an SGPR input in
// src2.
+ const MachineOperand *Src2 = TII->getNamedOperand(MI,
+ AMDGPU::OpName::src2);
if (Src2 && Src2->getReg() != VCCReg) {
if (Src2->getReg().isVirtual())
MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg);
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 46012e5d7d97..77ee3c0ff0e4 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -495,11 +495,10 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
// instruction as needing e.g. WQM before visiting it and realizing it needs
// WQM disabled.
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
- for (auto BI = RPOT.begin(), BE = RPOT.end(); BI != BE; ++BI) {
- MachineBasicBlock &MBB = **BI;
- BlockInfo &BBI = Blocks[&MBB];
+ for (MachineBasicBlock *MBB : RPOT) {
+ BlockInfo &BBI = Blocks[MBB];
- for (MachineInstr &MI : MBB) {
+ for (MachineInstr &MI : *MBB) {
InstrInfo &III = Instructions[&MI];
unsigned Opcode = MI.getOpcode();
char Flags = 0;
@@ -561,7 +560,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
BBI.Needs |= StateExact;
if (!(BBI.InNeeds & StateExact)) {
BBI.InNeeds |= StateExact;
- Worklist.push_back(&MBB);
+ Worklist.push_back(MBB);
}
GlobalFlags |= StateExact;
III.Disabled = StateWQM | StateStrict;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 8502ed61b366..184c871db775 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -181,15 +181,8 @@ class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pse
" $sdst", [(set i64:$sdst, (node))]> {
let hasSideEffects = 1;
- // FIXME: This should be definitively mayStore = 0. TableGen
- // brokenly tries to infer these based on the intrinsic properties
- // corresponding to the IR attributes. The target intrinsics are
- // considered as writing to memory for IR dependency purposes, but
- // those can be modeled with hasSideEffects here. These also end up
- // inferring differently for llvm.readcyclecounter and the amdgcn
- // intrinsics.
- let mayStore = ?;
- let mayLoad = 1;
+ let mayStore = 0;
+ let mayLoad = 0;
let has_sbase = 0;
let has_offset = 0;
}
@@ -765,11 +758,11 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformL
}];
}
-def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
-def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
-def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
-def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
-def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
+def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">;
+def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">;
+def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
+def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
+def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
multiclass SMRD_Pattern <string Instr, ValueType vt> {
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 61ecc13620a1..1713586dcf5b 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -157,6 +157,42 @@ class SOP1_1 <string opName, RegisterClass rc = SReg_64, list<dag> pattern=[]> :
let has_sdst = 0;
}
+class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
+ (ops node:$src0),
+ (Op $src0),
+ [{ return !N->isDivergent(); }]> {
+ // This check is unnecessary as it's captured by the result register
+ // bank constraint.
+ //
+ // FIXME: Should add a way for the emitter to recognize this is a
+ // trivially true predicate to eliminate the check.
+ let GISelPredicateCode = [{return true;}];
+}
+
+class UniformBinFrag<SDPatternOperator Op> : PatFrag <
+ (ops node:$src0, node:$src1),
+ (Op $src0, $src1),
+ [{ return !N->isDivergent(); }]> {
+ // This check is unnecessary as it's captured by the result register
+ // bank constraint.
+ //
+ // FIXME: Should add a way for the emitter to recognize this is a
+ // trivially true predicate to eliminate the check.
+ let GISelPredicateCode = [{return true;}];
+}
+
+class DivergentBinFrag<SDPatternOperator Op> : PatFrag <
+ (ops node:$src0, node:$src1),
+ (Op $src0, $src1),
+ [{ return N->isDivergent(); }]> {
+ // This check is unnecessary as it's captured by the result register
+ // bank constraint.
+ //
+ // FIXME: Should add a way for the emitter to recognize this is a
+ // trivially true predicate to eliminate the check.
+ let GISelPredicateCode = [{return true;}];
+}
+
let isMoveImm = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
@@ -172,11 +208,11 @@ let isMoveImm = 1 in {
let Defs = [SCC] in {
def S_NOT_B32 : SOP1_32 <"s_not_b32",
- [(set i32:$sdst, (not i32:$src0))]
+ [(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
>;
def S_NOT_B64 : SOP1_64 <"s_not_b64",
- [(set i64:$sdst, (not i64:$src0))]
+ [(set i64:$sdst, (UniformUnaryFrag<not> i64:$src0))]
>;
def S_WQM_B32 : SOP1_32 <"s_wqm_b32">;
def S_WQM_B64 : SOP1_64 <"s_wqm_b64">;
@@ -221,22 +257,22 @@ let isReMaterializable = 1 in {
def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">;
def S_FF0_I32_B64 : SOP1_32_64 <"s_ff0_i32_b64">;
def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64",
- [(set i32:$sdst, (AMDGPUffbl_b32 i64:$src0))]
+ [(set i32:$sdst, (UniformUnaryFrag<AMDGPUffbl_b32> i64:$src0))]
>;
def S_FF1_I32_B32 : SOP1_32 <"s_ff1_i32_b32",
- [(set i32:$sdst, (AMDGPUffbl_b32 i32:$src0))]
+ [(set i32:$sdst, (UniformUnaryFrag<AMDGPUffbl_b32> i32:$src0))]
>;
def S_FLBIT_I32_B32 : SOP1_32 <"s_flbit_i32_b32",
- [(set i32:$sdst, (AMDGPUffbh_u32 i32:$src0))]
+ [(set i32:$sdst, (UniformUnaryFrag<AMDGPUffbh_u32> i32:$src0))]
>;
def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64",
- [(set i32:$sdst, (AMDGPUffbh_u32 i64:$src0))]
+ [(set i32:$sdst, (UniformUnaryFrag<AMDGPUffbh_u32> i64:$src0))]
>;
def S_FLBIT_I32 : SOP1_32 <"s_flbit_i32",
- [(set i32:$sdst, (AMDGPUffbh_i32 i32:$src0))]
+ [(set i32:$sdst, (UniformUnaryFrag<AMDGPUffbh_i32> i32:$src0))]
>;
def S_FLBIT_I32_I64 : SOP1_32_64 <"s_flbit_i32_i64">;
def S_SEXT_I32_I8 : SOP1_32 <"s_sext_i32_i8",
@@ -426,41 +462,6 @@ class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
"$sdst, $src0, $src1", pattern
>;
-class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
- (ops node:$src0),
- (Op $src0),
- [{ return !N->isDivergent(); }]> {
- // This check is unnecessary as it's captured by the result register
- // bank constraint.
- //
- // FIXME: Should add a way for the emitter to recognize this is a
- // trivially true predicate to eliminate the check.
- let GISelPredicateCode = [{return true;}];
-}
-
-class UniformBinFrag<SDPatternOperator Op> : PatFrag <
- (ops node:$src0, node:$src1),
- (Op $src0, $src1),
- [{ return !N->isDivergent(); }]> {
- // This check is unnecessary as it's captured by the result register
- // bank constraint.
- //
- // FIXME: Should add a way for the emitter to recognize this is a
- // trivially true predicate to eliminate the check.
- let GISelPredicateCode = [{return true;}];
-}
-
-class DivergentBinFrag<SDPatternOperator Op> : PatFrag <
- (ops node:$src0, node:$src1),
- (Op $src0, $src1),
- [{ return N->isDivergent(); }]> {
- // This check is unnecessary as it's captured by the result register
- // bank constraint.
- //
- // FIXME: Should add a way for the emitter to recognize this is a
- // trivially true predicate to eliminate the check.
- let GISelPredicateCode = [{return true;}];
-}
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
@@ -485,19 +486,18 @@ def S_SUBB_U32 : SOP2_32 <"s_subb_u32",
[(set i32:$sdst, (UniformBinFrag<sube> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
} // End Uses = [SCC]
-
let isCommutable = 1 in {
def S_MIN_I32 : SOP2_32 <"s_min_i32",
- [(set i32:$sdst, (smin i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<smin> i32:$src0, i32:$src1))]
>;
def S_MIN_U32 : SOP2_32 <"s_min_u32",
- [(set i32:$sdst, (umin i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<umin> i32:$src0, i32:$src1))]
>;
def S_MAX_I32 : SOP2_32 <"s_max_i32",
- [(set i32:$sdst, (smax i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<smax> i32:$src0, i32:$src1))]
>;
def S_MAX_U32 : SOP2_32 <"s_max_u32",
- [(set i32:$sdst, (umax i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<umax> i32:$src0, i32:$src1))]
>;
} // End isCommutable = 1
} // End Defs = [SCC]
@@ -870,7 +870,7 @@ def S_GETREG_B32 : SOPK_Pseudo <
}
} // End mayLoad = 1
-let mayLoad = 0, mayStore = 0, Defs = [MODE], Uses = [MODE] in {
+let Defs = [MODE], Uses = [MODE] in {
// FIXME: Need to truncate immediate to 16-bits.
class S_SETREG_B32_Pseudo <list<dag> pattern=[]> : SOPK_Pseudo <
@@ -914,7 +914,7 @@ def S_SETREG_IMM32_B32_mode : S_SETREG_IMM32_B32_Pseudo {
let hasSideEffects = 0;
}
-} // End mayLoad = 0, mayStore = 0, Defs = [MODE], Uses = [MODE]
+} // End Defs = [MODE], Uses = [MODE]
class SOPK_WAITCNT<string opName, list<dag> pat=[]> :
SOPK_Pseudo<
@@ -1264,7 +1264,7 @@ def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > {
let mayStore = 1;
}
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+let hasSideEffects = 1 in
def S_WAITCNT : SOPP_Pseudo <"s_waitcnt" , (ins WAIT_FLAG:$simm16), "$simm16",
[(int_amdgcn_s_waitcnt timm:$simm16)]>;
def S_SETHALT : SOPP_Pseudo <"s_sethalt" , (ins i32imm:$simm16), "$simm16",
@@ -1278,8 +1278,6 @@ def S_SETKILL : SOPP_Pseudo <"s_setkill" , (ins i16imm:$simm16), "$simm16">;
def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16),
"$simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 0;
- let mayStore = 0;
}
def S_SETPRIO : SOPP_Pseudo <"s_setprio" , (ins i16imm:$simm16), "$simm16">;
@@ -1305,14 +1303,10 @@ def S_ICACHE_INV : SOPP_Pseudo <"s_icache_inv", (ins)> {
def S_INCPERFLEVEL : SOPP_Pseudo <"s_incperflevel", (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_incperflevel timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 0;
- let mayStore = 0;
}
def S_DECPERFLEVEL : SOPP_Pseudo <"s_decperflevel", (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 0;
- let mayStore = 0;
}
def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins)> {
let simm16 = 0;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
index 2e4d83fbbc39..a83ff6667956 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
@@ -15,7 +15,6 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/ReplaceConstant.h"
@@ -25,175 +24,6 @@ namespace llvm {
namespace AMDGPU {
-// An helper class for collecting all reachable callees for each kernel defined
-// within the module.
-class CollectReachableCallees {
- Module &M;
- CallGraph CG;
- SmallPtrSet<CallGraphNode *, 8> AddressTakenFunctions;
-
- // Collect all address taken functions within the module.
- void collectAddressTakenFunctions() {
- auto *ECNode = CG.getExternalCallingNode();
-
- for (auto GI = ECNode->begin(), GE = ECNode->end(); GI != GE; ++GI) {
- auto *CGN = GI->second;
- auto *F = CGN->getFunction();
- if (!F || F->isDeclaration() || AMDGPU::isKernelCC(F))
- continue;
- AddressTakenFunctions.insert(CGN);
- }
- }
-
- // For given kernel, collect all its reachable non-kernel functions.
- SmallPtrSet<Function *, 8> collectReachableCallees(Function *K) {
- SmallPtrSet<Function *, 8> ReachableCallees;
-
- // Call graph node which represents this kernel.
- auto *KCGN = CG[K];
-
- // Go through all call graph nodes reachable from the node representing this
- // kernel, visit all their call sites, if the call site is direct, add
- // corresponding callee to reachable callee set, if it is indirect, resolve
- // the indirect call site to potential reachable callees, add them to
- // reachable callee set, and repeat the process for the newly added
- // potential callee nodes.
- //
- // FIXME: Need to handle bit-casted function pointers.
- //
- SmallVector<CallGraphNode *, 8> CGNStack(df_begin(KCGN), df_end(KCGN));
- SmallPtrSet<CallGraphNode *, 8> VisitedCGNodes;
- while (!CGNStack.empty()) {
- auto *CGN = CGNStack.pop_back_val();
-
- if (!VisitedCGNodes.insert(CGN).second)
- continue;
-
- // Ignore call graph node which does not have associated function or
- // associated function is not a definition.
- if (!CGN->getFunction() || CGN->getFunction()->isDeclaration())
- continue;
-
- for (auto GI = CGN->begin(), GE = CGN->end(); GI != GE; ++GI) {
- auto *RCB = cast<CallBase>(GI->first.getValue());
- auto *RCGN = GI->second;
-
- if (auto *DCallee = RCGN->getFunction()) {
- ReachableCallees.insert(DCallee);
- } else if (RCB->isIndirectCall()) {
- auto *RCBFTy = RCB->getFunctionType();
- for (auto *ACGN : AddressTakenFunctions) {
- auto *ACallee = ACGN->getFunction();
- if (ACallee->getFunctionType() == RCBFTy) {
- ReachableCallees.insert(ACallee);
- CGNStack.append(df_begin(ACGN), df_end(ACGN));
- }
- }
- }
- }
- }
-
- return ReachableCallees;
- }
-
-public:
- explicit CollectReachableCallees(Module &M) : M(M), CG(CallGraph(M)) {
- // Collect address taken functions.
- collectAddressTakenFunctions();
- }
-
- void collectReachableCallees(
- DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
- // Collect reachable callee set for each kernel defined in the module.
- for (Function &F : M.functions()) {
- if (!AMDGPU::isKernelCC(&F))
- continue;
- Function *K = &F;
- KernelToCallees[K] = collectReachableCallees(K);
- }
- }
-};
-
-void collectReachableCallees(
- Module &M,
- DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
- CollectReachableCallees CRC{M};
- CRC.collectReachableCallees(KernelToCallees);
-}
-
-SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV) {
- SmallPtrSet<Function *, 8> LDSAccessors;
- SmallVector<User *, 8> UserStack(GV->users());
- SmallPtrSet<User *, 8> VisitedUsers;
-
- while (!UserStack.empty()) {
- auto *U = UserStack.pop_back_val();
-
- // `U` is already visited? continue to next one.
- if (!VisitedUsers.insert(U).second)
- continue;
-
- // `U` is a global variable which is initialized with LDS. Ignore LDS.
- if (isa<GlobalValue>(U))
- return SmallPtrSet<Function *, 8>();
-
- // Recursively explore constant users.
- if (isa<Constant>(U)) {
- append_range(UserStack, U->users());
- continue;
- }
-
- // `U` should be an instruction, if it belongs to a non-kernel function F,
- // then collect F.
- Function *F = cast<Instruction>(U)->getFunction();
- if (!AMDGPU::isKernelCC(F))
- LDSAccessors.insert(F);
- }
-
- return LDSAccessors;
-}
-
-DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
-getFunctionToInstsMap(User *U, bool CollectKernelInsts) {
- DenseMap<Function *, SmallPtrSet<Instruction *, 8>> FunctionToInsts;
- SmallVector<User *, 8> UserStack;
- SmallPtrSet<User *, 8> VisitedUsers;
-
- UserStack.push_back(U);
-
- while (!UserStack.empty()) {
- auto *UU = UserStack.pop_back_val();
-
- if (!VisitedUsers.insert(UU).second)
- continue;
-
- if (isa<GlobalValue>(UU))
- continue;
-
- if (isa<Constant>(UU)) {
- append_range(UserStack, UU->users());
- continue;
- }
-
- auto *I = cast<Instruction>(UU);
- Function *F = I->getFunction();
- if (CollectKernelInsts) {
- if (!AMDGPU::isKernelCC(F)) {
- continue;
- }
- } else {
- if (AMDGPU::isKernelCC(F)) {
- continue;
- }
- }
-
- FunctionToInsts.insert(std::make_pair(F, SmallPtrSet<Instruction *, 8>()));
- FunctionToInsts[F].insert(I);
- }
-
- return FunctionToInsts;
-}
-
bool isKernelCC(const Function *Func) {
return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
}
@@ -232,26 +62,8 @@ void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) {
}
}
-bool hasUserInstruction(const GlobalValue *GV) {
- SmallPtrSet<const User *, 8> Visited;
- SmallVector<const User *, 16> Stack(GV->users());
-
- while (!Stack.empty()) {
- const User *U = Stack.pop_back_val();
-
- if (!Visited.insert(U).second)
- continue;
-
- if (isa<Instruction>(U))
- return true;
-
- append_range(Stack, U->users());
- }
-
- return false;
-}
-
-bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F) {
+static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
+ const Function *F) {
// We are not interested in kernel LDS lowering for module LDS itself.
if (F && GV.getName() == "llvm.amdgcn.module.lds")
return false;
@@ -259,7 +71,6 @@ bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F) {
bool Ret = false;
SmallPtrSet<const User *, 8> Visited;
SmallVector<const User *, 16> Stack(GV.users());
- SmallPtrSet<const GlobalValue *, 8> GlobalUsers;
assert(!F || isKernelCC(F));
@@ -267,15 +78,10 @@ bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F) {
const User *V = Stack.pop_back_val();
Visited.insert(V);
- if (auto *G = dyn_cast<GlobalValue>(V)) {
- StringRef GName = G->getName();
- if (F && GName != "llvm.used" && GName != "llvm.compiler.used") {
- // For kernel LDS lowering, if G is not a compiler.used list, then we
- // cannot lower the lds GV since we cannot replace the use of GV within
- // G.
- return false;
- }
- GlobalUsers.insert(G);
+ if (isa<GlobalValue>(V)) {
+ // This use of the LDS variable is the initializer of a global variable.
+ // This is ill formed. The address of an LDS variable is kernel dependent
+ // and unknown until runtime. It can't be written to a global variable.
continue;
}
@@ -297,15 +103,6 @@ bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F) {
append_range(Stack, V->users());
}
- if (!F && !Ret) {
- // For module LDS lowering, we have not yet decided if we should lower GV or
- // not. Explore all global users of GV, and check if atleast one of these
- // global users appear as an use within an instruction (possibly nested use
- // via constant expression), if so, then conservately lower LDS.
- for (auto *G : GlobalUsers)
- Ret |= hasUserInstruction(G);
- }
-
return Ret;
}
@@ -324,7 +121,7 @@ std::vector<GlobalVariable *> findVariablesToLower(Module &M,
continue;
}
if (!isa<UndefValue>(GV.getInitializer())) {
- // Initializers are unimplemented for local address space.
+ // Initializers are unimplemented for LDS address space.
// Leave such variables in place for consistent error reporting.
continue;
}
@@ -342,20 +139,6 @@ std::vector<GlobalVariable *> findVariablesToLower(Module &M,
return LocalVars;
}
-SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) {
- SmallPtrSet<GlobalValue *, 32> UsedList;
-
- SmallVector<GlobalValue *, 32> TmpVec;
- collectUsedGlobalVariables(M, TmpVec, true);
- UsedList.insert(TmpVec.begin(), TmpVec.end());
-
- TmpVec.clear();
- collectUsedGlobalVariables(M, TmpVec, false);
- UsedList.insert(TmpVec.begin(), TmpVec.end());
-
- return UsedList;
-}
-
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
index d1c9229bc336..83ef68cc3f60 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
@@ -22,44 +22,13 @@ class ConstantExpr;
namespace AMDGPU {
-/// Collect reachable callees for each kernel defined in the module \p M and
-/// return collected callees at \p KernelToCallees.
-void collectReachableCallees(
- Module &M,
- DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees);
-
-/// For the given LDS global \p GV, visit all its users and collect all
-/// non-kernel functions within which \p GV is used and return collected list of
-/// such non-kernel functions.
-SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV);
-
-/// Collect all the instructions where user \p U belongs to. \p U could be
-/// instruction itself or it could be a constant expression which is used within
-/// an instruction. If \p CollectKernelInsts is true, collect instructions only
-/// from kernels, otherwise collect instructions only from non-kernel functions.
-DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
-getFunctionToInstsMap(User *U, bool CollectKernelInsts);
-
bool isKernelCC(const Function *Func);
Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
-/// \returns true if a given global variable \p GV (or its global users) appear
-/// as an use within some instruction (either from kernel or from non-kernel).
-bool hasUserInstruction(const GlobalValue *GV);
-
-/// \returns true if an LDS global requires lowering to a module LDS structure
-/// if \p F is not given. If \p F is given it must be a kernel and function
-/// \returns true if an LDS global is directly used from that kernel and it
-/// is safe to replace its uses with a kernel LDS structure member.
-bool shouldLowerLDSToStruct(const GlobalVariable &GV,
- const Function *F = nullptr);
-
std::vector<GlobalVariable *> findVariablesToLower(Module &M,
const Function *F = nullptr);
-SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M);
-
/// Replace all uses of constant \p C with instructions in \p F.
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F);
} // end namespace AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index a3eccf13cd71..a8368892c565 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -794,6 +794,18 @@ class VOPPatGen<SDPatternOperator Op, VOPProfile P> {
list<dag> ret = [!con(Outs, (set Ins))];
}
+class DivergentUnaryFrag<SDPatternOperator Op> : PatFrag <
+ (ops node:$src0),
+ (Op $src0),
+ [{ return N->isDivergent(); }]> {
+ // This check is unnecessary as it's captured by the result register
+ // bank constraint.
+ //
+ // FIXME: Should add a way for the emitter to recognize this is a
+ // trivially true predicate to eliminate the check.
+ let GISelPredicateCode = [{return true;}];
+}
+
class VOPPatOrNull<SDPatternOperator Op, VOPProfile P> {
list<dag> ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen<Op, P>.ret, []);
}
diff --git a/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index f4d0f4a6d6b0..d0efecad63bc 100644
--- a/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -592,16 +592,15 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
bool Modified = false;
- for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end();
- I != E; ++I) {
+ for (unsigned I : Defs) {
// Follow the def-use chain for this DPR through COPYs, and also through
// PHIs (which are essentially multi-way COPYs). It is because of PHIs that
// we can end up with multiple defs of this DPR.
SmallVector<MachineInstr *, 8> DefSrcs;
- if (!Register::isVirtualRegister(*I))
+ if (!Register::isVirtualRegister(I))
continue;
- MachineInstr *Def = MRI->getVRegDef(*I);
+ MachineInstr *Def = MRI->getVRegDef(I);
if (!Def)
continue;
@@ -628,18 +627,17 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
if (NewReg != 0) {
Modified = true;
- for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(),
- E = Uses.end(); I != E; ++I) {
+ for (MachineOperand *Use : Uses) {
// Make sure to constrain the register class of the new register to
// match what we're replacing. Otherwise we can optimize a DPR_VFP2
// reference into a plain DPR, and that will end poorly. NewReg is
// always virtual here, so there will always be a matching subclass
// to find.
- MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg()));
+ MRI->constrainRegClass(NewReg, MRI->getRegClass(Use->getReg()));
- LLVM_DEBUG(dbgs() << "Replacing operand " << **I << " with "
+ LLVM_DEBUG(dbgs() << "Replacing operand " << *Use << " with "
<< printReg(NewReg) << "\n");
- (*I)->substVirtReg(NewReg, 0, *TRI);
+ Use->substVirtReg(NewReg, 0, *TRI);
}
}
Replacements[MI] = NewReg;
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index e03dd597eb65..8173fe4036a8 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -446,6 +446,11 @@ def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true",
"Enable Pointer Authentication and Branch "
"Target Identification">;
+def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice",
+ "NoBTIAtReturnTwice", "true",
+ "Don't place a BTI instruction "
+ "after a return-twice">;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 6a88ac485e69..fa09b2567aa9 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1153,8 +1153,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
unsigned StartOp = 2 + 2;
// Use all the operands.
unsigned NumOffset = 0;
- // Amount of SP adjustment folded into a push.
- unsigned Pad = 0;
+ // Amount of SP adjustment folded into a push, before the
+ // registers are stored (pad at higher addresses).
+ unsigned PadBefore = 0;
+ // Amount of SP adjustment folded into a push, after the
+ // registers are stored (pad at lower addresses).
+ unsigned PadAfter = 0;
switch (Opc) {
default:
@@ -1185,7 +1189,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
"Pad registers must come before restored ones");
unsigned Width =
TargetRegInfo->getRegSizeInBits(MO.getReg(), MachineRegInfo) / 8;
- Pad += Width;
+ PadAfter += Width;
continue;
}
// Check for registers that are remapped (for a Thumb1 prologue that
@@ -1201,14 +1205,32 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
case ARM::t2STR_PRE:
assert(MI->getOperand(2).getReg() == ARM::SP &&
"Only stack pointer as a source reg is supported");
+ if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(SrcReg))
+ SrcReg = RemappedReg;
+
+ RegList.push_back(SrcReg);
+ break;
+ case ARM::t2STRD_PRE:
+ assert(MI->getOperand(3).getReg() == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ SrcReg = MI->getOperand(1).getReg();
+ if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(SrcReg))
+ SrcReg = RemappedReg;
+ RegList.push_back(SrcReg);
+ SrcReg = MI->getOperand(2).getReg();
+ if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(SrcReg))
+ SrcReg = RemappedReg;
RegList.push_back(SrcReg);
+ PadBefore = -MI->getOperand(4).getImm() - 8;
break;
}
if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) {
+ if (PadBefore)
+ ATS.emitPad(PadBefore);
ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
// Account for the SP adjustment, folded into the push.
- if (Pad)
- ATS.emitPad(Pad);
+ if (PadAfter)
+ ATS.emitPad(PadAfter);
}
} else {
// Changes of stack / frame pointer.
@@ -1300,6 +1322,10 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
Offset = MI->getOperand(2).getImm();
AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16);
break;
+ case ARM::t2PAC:
+ case ARM::t2PACBTI:
+ AFI->EHPrologueRemappedRegs[ARM::R12] = ARM::RA_AUTH_CODE;
+ break;
default:
MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 2a12947d24a8..884f38ff6c58 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2629,8 +2629,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
// Add the complete list back in.
MachineInstrBuilder MIB(MF, &*MI);
- for (int i = RegList.size() - 1; i >= 0; --i)
- MIB.add(RegList[i]);
+ for (const MachineOperand &MO : llvm::reverse(RegList))
+ MIB.add(MO);
return true;
}
@@ -5678,7 +5678,7 @@ bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
/// | | Thumb2 | ARM |
/// +-------------------------+--------+-----+
/// | Call overhead in Bytes | 4 | 4 |
-/// | Frame overhead in Bytes | 4 | 4 |
+/// | Frame overhead in Bytes | 2 | 4 |
/// | Stack fixup required | No | No |
/// +-------------------------+--------+-----+
///
@@ -5755,7 +5755,7 @@ struct OutlinerCosts {
CallThunk(target.isThumb() ? 4 : 4),
FrameThunk(target.isThumb() ? 0 : 0),
CallNoLRSave(target.isThumb() ? 4 : 4),
- FrameNoLRSave(target.isThumb() ? 4 : 4),
+ FrameNoLRSave(target.isThumb() ? 2 : 4),
CallRegSave(target.isThumb() ? 8 : 12),
FrameRegSave(target.isThumb() ? 2 : 4),
CallDefault(target.isThumb() ? 8 : 12),
@@ -5868,11 +5868,17 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
return outliner::OutlinedFunction();
}
+ // We expect the majority of the outlining candidates to be in consensus with
+ // regard to return address sign and authentication, and branch target
+ // enforcement, in other words, partitioning according to all the four
+ // possible combinations of PAC-RET and BTI is going to yield one big subset
+ // and three small (likely empty) subsets. That allows us to cull incompatible
+ // candidates separately for PAC-RET and BTI.
+
// Partition the candidates in two sets: one with BTI enabled and one with BTI
- // disabled. Remove the candidates from the smaller set. We expect the
- // majority of the candidates to be in consensus with regard to branch target
- // enforcement with just a few oddballs, but if they are the same number
- // prefer the non-BTI ones for outlining, since they have less overhead.
+ // disabled. Remove the candidates from the smaller set. If they are the same
+ // number prefer the non-BTI ones for outlining, since they have less
+ // overhead.
auto NoBTI =
llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
@@ -5883,6 +5889,24 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
else
RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
+
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+
+ // Likewise, partition the candidates according to PAC-RET enablement.
+ auto NoPAC =
+ llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
+ const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
+ // If the function happens to not spill the LR, do not disqualify it
+ // from the outlining.
+ return AFI.shouldSignReturnAddress(true);
+ });
+ if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
+ std::distance(NoPAC, RepeatedSequenceLocs.end()))
+ RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
+ else
+ RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
+
if (RepeatedSequenceLocs.size() < 2)
return outliner::OutlinedFunction();
@@ -5899,6 +5923,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
};
OutlinerCosts Costs(Subtarget);
+
const auto &SomeMFI =
*RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
// Adjust costs to account for the BTI instructions.
@@ -5909,6 +5934,13 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
Costs.FrameTailCall += 4;
Costs.FrameThunk += 4;
}
+
+ // Adjust costs to account for sign and authentication instructions.
+ if (SomeMFI.shouldSignReturnAddress(true)) {
+ Costs.CallDefault += 8; // +PAC instr, +AUT instr
+ Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
+ }
+
unsigned FrameID = MachineOutlinerDefault;
unsigned NumBytesToCreateFrame = Costs.FrameDefault;
@@ -6325,6 +6357,11 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
// * LR is available in the range (No save/restore around call)
// * The range doesn't include calls (No save/restore in outlined frame)
// are true.
+ // These conditions also ensure correctness of the return address
+ // authentication - we insert sign and authentication instructions only if
+ // we save/restore LR on stack, but then this condition ensures that the
+ // outlined range does not modify the SP, therefore the SP value used for
+ // signing is the same as the one used for authentication.
// FIXME: This is very restrictive; the flags check the whole block,
// not just the bit we will try to outline.
bool MightNeedStackFixUp =
@@ -6369,23 +6406,39 @@ void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
}
void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const {
- unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
- int Align = -Subtarget.getStackAlignment().value();
- BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
- .addReg(ARM::LR, RegState::Kill)
- .addReg(ARM::SP)
- .addImm(Align)
- .add(predOps(ARMCC::AL));
-}
+ MachineBasicBlock::iterator It, bool CFI,
+ bool Auth) const {
+ int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
+ assert(Align >= 8 && Align <= 256);
+ if (Auth) {
+ assert(Subtarget.isThumb2());
+ // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
+ // sequence.
+ BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC))
+ .setMIFlags(MachineInstr::FrameSetup);
+ BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
+ .addReg(ARM::R12, RegState::Kill)
+ .addReg(ARM::LR, RegState::Kill)
+ .addReg(ARM::SP)
+ .addImm(-Align)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MachineInstr::FrameSetup);
+ } else {
+ unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
+ BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
+ .addReg(ARM::LR, RegState::Kill)
+ .addReg(ARM::SP)
+ .addImm(-Align)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ if (!CFI)
+ return;
-void ARMBaseInstrInfo::emitCFIForLRSaveOnStack(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
MachineFunction &MF = *MBB.getParent();
- const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
- unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
- int Align = Subtarget.getStackAlignment().value();
- // Add a CFI saying the stack was moved down.
+
+ // Add a CFI, saying CFA is offset by Align bytes from SP.
int64_t StackPosEntry =
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align));
BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
@@ -6394,11 +6447,23 @@ void ARMBaseInstrInfo::emitCFIForLRSaveOnStack(
// Add a CFI saying that the LR that we want to find is now higher than
// before.
- int64_t LRPosEntry =
- MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfLR, -Align));
+ int LROffset = Auth ? Align - 4 : Align;
+ const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
+ unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
+ int64_t LRPosEntry = MF.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, DwarfLR, -LROffset));
BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
.addCFIIndex(LRPosEntry)
.setMIFlags(MachineInstr::FrameSetup);
+ if (Auth) {
+ // Add a CFI for the location of the return adddress PAC.
+ unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
+ int64_t RACPosEntry = MF.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, DwarfRAC, -Align));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(RACPosEntry)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
}
void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
@@ -6416,35 +6481,64 @@ void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
.setMIFlags(MachineInstr::FrameSetup);
}
-void ARMBaseInstrInfo::restoreLRFromStack(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
- unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP);
- if (!Subtarget.isThumb())
- MIB.addReg(0);
- MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL));
-}
+void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It,
+ bool CFI, bool Auth) const {
+ int Align = Subtarget.getStackAlignment().value();
+ if (Auth) {
+ assert(Subtarget.isThumb2());
+ // Restore return address PAC and LR.
+ BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .addImm(Align)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MachineInstr::FrameDestroy);
+ // LR authentication is after the CFI instructions, below.
+ } else {
+ unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+ MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP);
+ if (!Subtarget.isThumb())
+ MIB.addReg(0);
+ MIB.addImm(Subtarget.getStackAlignment().value())
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MachineInstr::FrameDestroy);
+ }
-void ARMBaseInstrInfo::emitCFIForLRRestoreFromStack(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
- // Now stack has moved back up...
- MachineFunction &MF = *MBB.getParent();
- const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
- unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
- int64_t StackPosEntry =
- MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
- BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
- .addCFIIndex(StackPosEntry)
- .setMIFlags(MachineInstr::FrameDestroy);
+ if (CFI) {
+ // Now stack has moved back up...
+ MachineFunction &MF = *MBB.getParent();
+ const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
+ unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
+ int64_t StackPosEntry =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(StackPosEntry)
+ .setMIFlags(MachineInstr::FrameDestroy);
- // ... and we have restored LR.
- int64_t LRPosEntry =
- MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
- BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
- .addCFIIndex(LRPosEntry)
- .setMIFlags(MachineInstr::FrameDestroy);
+ // ... and we have restored LR.
+ int64_t LRPosEntry =
+ MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(LRPosEntry)
+ .setMIFlags(MachineInstr::FrameDestroy);
+
+ if (Auth) {
+ unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);
+ int64_t Entry =
+ MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, DwarfRAC));
+ BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
+ .addCFIIndex(Entry)
+ .setMIFlags(MachineInstr::FrameDestroy);
+ }
+ }
+
+ if (Auth)
+ BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
}
void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
@@ -6500,8 +6594,11 @@ void ARMBaseInstrInfo::buildOutlinedFrame(
MBB.addLiveIn(ARM::LR);
// Insert a save before the outlined region
- saveLROnStack(MBB, It);
- emitCFIForLRSaveOnStack(MBB, It);
+ bool Auth = OF.Candidates.front()
+ .getMF()
+ ->getInfo<ARMFunctionInfo>()
+ ->shouldSignReturnAddress(true);
+ saveLROnStack(MBB, It, true, Auth);
// Fix up the instructions in the range, since we're going to modify the
// stack.
@@ -6510,8 +6607,7 @@ void ARMBaseInstrInfo::buildOutlinedFrame(
fixupPostOutline(MBB);
// Insert a restore before the terminator for the function. Restore LR.
- restoreLRFromStack(MBB, Et);
- emitCFIForLRRestoreFromStack(MBB, Et);
+ restoreLRFromStack(MBB, Et, true, Auth);
}
// If this is a tail call outlined function, then there's already a return.
@@ -6590,13 +6686,10 @@ MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
// We have the default case. Save and restore from SP.
if (!MBB.isLiveIn(ARM::LR))
MBB.addLiveIn(ARM::LR);
- saveLROnStack(MBB, It);
- if (!AFI.isLRSpilled())
- emitCFIForLRSaveOnStack(MBB, It);
+ bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
+ saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
CallPt = MBB.insert(It, CallMIB);
- restoreLRFromStack(MBB, It);
- if (!AFI.isLRSpilled())
- emitCFIForLRRestoreFromStack(MBB, It);
+ restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
It--;
return CallPt;
}
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 5fa912ae35d7..defce07dd862 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -377,20 +377,20 @@ private:
/// constructing an outlined call if one exists. Returns 0 otherwise.
unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
- // Adds an instruction which saves the link register on top of the stack into
- /// the MachineBasicBlock \p MBB at position \p It.
- void saveLROnStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const;
+ /// Adds an instruction which saves the link register on top of the stack into
+ /// the MachineBasicBlock \p MBB at position \p It. If \p Auth is true,
+ /// compute and store an authentication code alongiside the link register.
+ /// If \p CFI is true, emit CFI instructions.
+ void saveLROnStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator It,
+ bool CFI, bool Auth) const;
/// Adds an instruction which restores the link register from the top the
- /// stack into the MachineBasicBlock \p MBB at position \p It.
+ /// stack into the MachineBasicBlock \p MBB at position \p It. If \p Auth is
+ /// true, restore an authentication code and authenticate LR.
+ /// If \p CFI is true, emit CFI instructions.
void restoreLRFromStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const;
-
- /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
- /// for the case when the LR is saved on the stack.
- void emitCFIForLRSaveOnStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const;
+ MachineBasicBlock::iterator It, bool CFI,
+ bool Auth) const;
/// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
/// for the case when the LR is saved in the register \p Reg.
@@ -399,11 +399,6 @@ private:
Register Reg) const;
/// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
- /// after the LR is was restored from the stack.
- void emitCFIForLRRestoreFromStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator It) const;
-
- /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
/// after the LR is was restored from a register.
void emitCFIForLRRestoreFromReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator It) const;
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b53efe58e8de..c543d02ff75a 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -530,6 +530,8 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
unsigned ImmIdx = 0;
switch (AddrMode) {
case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i8neg:
+ case ARMII::AddrModeT2_i8pos:
case ARMII::AddrModeT2_i12:
case ARMII::AddrMode_i12:
InstrOffs = MI->getOperand(Idx+1).getImm();
@@ -728,6 +730,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
bool isSigned = true;
switch (AddrMode) {
case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i8pos:
+ case ARMII::AddrModeT2_i8neg:
case ARMII::AddrModeT2_i12:
// i8 supports only negative, and i12 supports only positive, so
// based on Offset sign, consider the appropriate instruction
diff --git a/llvm/lib/Target/ARM/ARMBranchTargets.cpp b/llvm/lib/Target/ARM/ARMBranchTargets.cpp
index 1091c1f970fa..8ba3e627c039 100644
--- a/llvm/lib/Target/ARM/ARMBranchTargets.cpp
+++ b/llvm/lib/Target/ARM/ARMBranchTargets.cpp
@@ -108,6 +108,7 @@ void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB,
bool IsFirstBB) {
// Which instruction to insert: BTI or PACBTI
unsigned OpCode = ARM::t2BTI;
+ unsigned MIFlags = 0;
// Skip meta instructions, including EH labels
auto MBBI = llvm::find_if_not(MBB.instrs(), [](const MachineInstr &MI) {
@@ -121,6 +122,7 @@ void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB,
LLVM_DEBUG(dbgs() << "Removing a 'PAC' instr from BB '" << MBB.getName()
<< "' to replace with PACBTI\n");
OpCode = ARM::t2PACBTI;
+ MIFlags = MachineInstr::FrameSetup;
auto NextMBBI = std::next(MBBI);
MBBI->eraseFromParent();
MBBI = NextMBBI;
@@ -131,5 +133,6 @@ void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB,
<< (OpCode == ARM::t2BTI ? "BTI" : "PACBTI")
<< "' instr into BB '" << MBB.getName() << "'\n");
// Finally, insert a new instruction (either PAC or PACBTI)
- BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode));
+ BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode))
+ .setMIFlags(MIFlags);
}
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.cpp b/llvm/lib/Target/ARM/ARMCallingConv.cpp
index d8d9ca3b912f..32f3a4a632f5 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.cpp
+++ b/llvm/lib/Target/ARM/ARMCallingConv.cpp
@@ -230,10 +230,9 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
if (RegResult) {
- for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin();
- It != PendingMembers.end(); ++It) {
- It->convertToReg(RegResult);
- State.addLoc(*It);
+ for (CCValAssign &PendingMember : PendingMembers) {
+ PendingMember.convertToReg(RegResult);
+ State.addLoc(PendingMember);
++RegResult;
}
PendingMembers.clear();
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index c2ca4708c208..a2a4f1f3bdfd 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -310,8 +310,7 @@ void ARMConstantIslands::verify() {
BBInfo[RHS.getNumber()].postOffset();
}));
LLVM_DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
- for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
- CPUser &U = CPUsers[i];
+ for (CPUser &U : CPUsers) {
unsigned UserOffset = getUserOffset(U);
// Verify offset using the real max displacement without the safety
// adjustment.
@@ -697,10 +696,9 @@ ARMConstantIslands::findConstPoolEntry(unsigned CPI,
std::vector<CPEntry> &CPEs = CPEntries[CPI];
// Number of entries per constpool index should be small, just do a
// linear search.
- for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
- if (CPEs[i].CPEMI == CPEMI)
- return &CPEs[i];
- }
+ for (CPEntry &CPE : CPEs)
+ if (CPE.CPEMI == CPEMI)
+ return &CPE;
return nullptr;
}
@@ -1234,27 +1232,27 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) {
// No. Look for previously created clones of the CPE that are in range.
unsigned CPI = getCombinedIndex(CPEMI);
std::vector<CPEntry> &CPEs = CPEntries[CPI];
- for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ for (CPEntry &CPE : CPEs) {
// We already tried this one
- if (CPEs[i].CPEMI == CPEMI)
+ if (CPE.CPEMI == CPEMI)
continue;
// Removing CPEs can leave empty entries, skip
- if (CPEs[i].CPEMI == nullptr)
+ if (CPE.CPEMI == nullptr)
continue;
- if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
- U.NegOk)) {
- LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
- << CPEs[i].CPI << "\n");
+ if (isCPEntryInRange(UserMI, UserOffset, CPE.CPEMI, U.getMaxDisp(),
+ U.NegOk)) {
+ LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPE.CPI
+ << "\n");
// Point the CPUser node to the replacement
- U.CPEMI = CPEs[i].CPEMI;
+ U.CPEMI = CPE.CPEMI;
// Change the CPI in the instruction operand to refer to the clone.
for (MachineOperand &MO : UserMI->operands())
if (MO.isCPI()) {
- MO.setIndex(CPEs[i].CPI);
+ MO.setIndex(CPE.CPI);
break;
}
// Adjust the refcount of the clone...
- CPEs[i].RefCount++;
+ CPE.RefCount++;
// ...and the original. If we didn't remove the old entry, none of the
// addresses changed, so we don't need another pass.
return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
@@ -1675,15 +1673,14 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
/// are zero.
bool ARMConstantIslands::removeUnusedCPEntries() {
unsigned MadeChange = false;
- for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
- std::vector<CPEntry> &CPEs = CPEntries[i];
- for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
- if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
- removeDeadCPEMI(CPEs[j].CPEMI);
- CPEs[j].CPEMI = nullptr;
- MadeChange = true;
- }
+ for (std::vector<CPEntry> &CPEs : CPEntries) {
+ for (CPEntry &CPE : CPEs) {
+ if (CPE.RefCount == 0 && CPE.CPEMI) {
+ removeDeadCPEMI(CPE.CPEMI);
+ CPE.CPEMI = nullptr;
+ MadeChange = true;
}
+ }
}
return MadeChange;
}
@@ -1829,8 +1826,7 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
bool MadeChange = false;
// Shrink ADR and LDR from constantpool.
- for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
- CPUser &U = CPUsers[i];
+ for (CPUser &U : CPUsers) {
unsigned Opcode = U.MI->getOpcode();
unsigned NewOpc = 0;
unsigned Scale = 1;
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 7a35f252b22a..fa244786a80d 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -2160,6 +2160,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
case ARM::tBXNS_RET: {
+ // For v8.0-M.Main we need to authenticate LR before clearing FPRs, which
+ // uses R12 as a scratch register.
+ if (!STI->hasV8_1MMainlineOps() && AFI->shouldSignReturnAddress())
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARM::t2AUT));
+
MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI);
if (STI->hasV8_1MMainlineOps()) {
@@ -2169,6 +2174,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addReg(ARM::SP)
.addImm(4)
.add(predOps(ARMCC::AL));
+
+ if (AFI->shouldSignReturnAddress())
+ BuildMI(AfterBB, AfterBB.end(), DebugLoc(), TII->get(ARM::t2AUT));
}
// Clear all GPR that are not a use of the return instruction.
@@ -3073,6 +3081,22 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
+ case ARM::t2CALL_BTI: {
+ MachineFunction &MF = *MI.getMF();
+ MachineInstrBuilder MIB =
+ BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::tBL));
+ MIB.cloneMemRefs(MI);
+ for (unsigned i = 0; i < MI.getNumOperands(); ++i)
+ MIB.add(MI.getOperand(i));
+ if (MI.isCandidateForCallSiteEntry())
+ MF.moveCallSiteInfo(&MI, MIB.getInstr());
+ MIBundleBuilder Bundler(MBB, MI);
+ Bundler.append(MIB);
+ Bundler.append(BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::t2BTI)));
+ finalizeBundle(MBB, Bundler.begin(), Bundler.end());
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::LOADDUAL:
case ARM::STOREDUAL: {
Register PairReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index b866cf952ff1..4b59f9cb94ce 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -503,20 +503,12 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
StackAdjustingInsts DefCFAOffsetCandidates;
bool HasFP = hasFP(MF);
- // Allocate the vararg register save area.
- if (ArgRegsSaveSize) {
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
- MachineInstr::FrameSetup);
- DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
- }
-
if (!AFI->hasStackFrame() &&
(!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
- if (NumBytes - ArgRegsSaveSize != 0) {
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
+ if (NumBytes != 0) {
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
MachineInstr::FrameSetup);
- DefCFAOffsetCandidates.addInst(std::prev(MBBI),
- NumBytes - ArgRegsSaveSize, true);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
}
DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
return;
@@ -562,13 +554,26 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- // Move past FPCXT area.
MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
+
+ // Move past the PAC computation.
+ if (AFI->shouldSignReturnAddress())
+ LastPush = MBBI++;
+
+ // Move past FPCXT area.
if (FPCXTSaveSize > 0) {
LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
}
+ // Allocate the vararg register save area.
+ if (ArgRegsSaveSize) {
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
+ MachineInstr::FrameSetup);
+ LastPush = std::prev(MBBI);
+ DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
+ }
+
// Move past area 1.
if (GPRCS1Size > 0) {
GPRCS1Push = LastPush = MBBI++;
@@ -788,7 +793,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R11:
case ARM::R12:
if (STI.splitFramePushPop(MF)) {
- unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
+ unsigned DwarfReg = MRI->getDwarfRegNum(
+ Reg == ARM::R12 ? (unsigned)ARM::RA_AUTH_CODE : Reg, true);
unsigned Offset = MFI.getObjectOffset(FI);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
@@ -923,8 +929,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
if (!AFI->hasStackFrame()) {
- if (NumBytes - ReservedArgStack != 0)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ReservedArgStack,
+ if (NumBytes + IncomingArgStackToRestore != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII,
+ NumBytes + IncomingArgStackToRestore,
MachineInstr::FrameDestroy);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
@@ -1007,15 +1014,21 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
- if (AFI->getFPCXTSaveAreaSize()) MBBI++;
- }
- if (ReservedArgStack || IncomingArgStackToRestore) {
- assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
- "attempting to restore negative stack amount");
- emitSPUpdate(isARM, MBB, MBBI, dl, TII,
- ReservedArgStack + IncomingArgStackToRestore,
- MachineInstr::FrameDestroy);
+ if (ReservedArgStack || IncomingArgStackToRestore) {
+ assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
+ "attempting to restore negative stack amount");
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII,
+ ReservedArgStack + IncomingArgStackToRestore,
+ MachineInstr::FrameDestroy);
+ }
+
+ // Validate PAC, It should have been already popped into R12. For CMSE entry
+ // function, the validation instruction is emitted during expansion of the
+ // tBXNS_RET, since the validation must use the value of SP at function
+ // entry, before saving, resp. after restoring, FPCXTNS.
+ if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
+ BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
}
}
@@ -1199,6 +1212,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool hasPAC = AFI->shouldSignReturnAddress();
DebugLoc DL;
bool isTailCall = false;
bool isInterrupt = false;
@@ -1231,7 +1245,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
- STI.hasV5TOps() && MBB.succ_empty()) {
+ STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) {
Reg = ARM::PC;
// Fold the return instruction into the LDM.
DeleteRet = true;
@@ -1580,6 +1594,11 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
ARM::t2STR_PRE : ARM::STR_PRE_IMM;
unsigned FltOpc = ARM::VSTMDDB_UPD;
unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+ // Compute PAC in R12.
+ if (AFI->shouldSignReturnAddress()) {
+ BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
// Save the non-secure floating point context.
if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
return C.getReg() == ARM::FPCXTNS;
@@ -1789,6 +1808,13 @@ bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
return false;
+ // We are disabling shrinkwrapping for now when PAC is enabled, as
+ // shrinkwrapping can cause clobbering of r12 when the PAC code is
+ // generated. A follow-up patch will fix this in a more performant manner.
+ if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
+ false /*SpillsLR */))
+ return false;
+
return true;
}
@@ -2315,6 +2341,26 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
CSI.back().setRestored(false);
}
+ // For functions, which sign their return address, upon function entry, the
+ // return address PAC is computed in R12. Treat R12 as a callee-saved register
+ // in this case.
+ const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
+ if (AFI.shouldSignReturnAddress()) {
+ // The order of register must match the order we push them, because the
+ // PEI assigns frame indices in that order. When compiling for return
+ // address sign and authenication, we use split push, therefore the orders
+ // we want are:
+ // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
+ CSI.insert(find_if(CSI,
+ [=](const auto &CS) {
+ unsigned Reg = CS.getReg();
+ return Reg == ARM::R10 || Reg == ARM::R11 ||
+ Reg == ARM::R8 || Reg == ARM::R9 ||
+ ARM::DPRRegClass.contains(Reg);
+ }),
+ CalleeSavedInfo(ARM::R12));
+ }
+
return false;
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 33d115945614..3d45db349644 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -391,6 +391,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Legal);
}
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
@@ -428,7 +429,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
}
// Predicate types
- const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
+ const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
for (auto VT : pTypes) {
addRegisterClass(VT, &ARM::VCCRRegClass);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
@@ -445,6 +446,16 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
}
+ setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand);
+ setOperationAction(ISD::AND, MVT::v2i1, Expand);
+ setOperationAction(ISD::OR, MVT::v2i1, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i1, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand);
+
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
@@ -1647,6 +1658,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::CALL_PRED)
MAKE_CASE(ARMISD::CALL_NOLINK)
MAKE_CASE(ARMISD::tSECALL)
+ MAKE_CASE(ARMISD::t2CALL_BTI)
MAKE_CASE(ARMISD::BRCOND)
MAKE_CASE(ARMISD::BR_JT)
MAKE_CASE(ARMISD::BR2_JT)
@@ -1853,8 +1865,10 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
// MVE has a predicate register.
if ((Subtarget->hasMVEIntegerOps() &&
- (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) ||
- (Subtarget->hasMVEFloatOps() && (VT == MVT::v4f32 || VT == MVT::v8f16)))
+ (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ VT == MVT::v16i8)) ||
+ (Subtarget->hasMVEFloatOps() &&
+ (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
@@ -2308,6 +2322,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isCmseNSCall = false;
bool isSibCall = false;
bool PreferIndirect = false;
+ bool GuardWithBTI = false;
+
+ // Lower 'returns_twice' calls to a pseudo-instruction.
+ if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
+ !Subtarget->getNoBTIAtReturnTwice())
+ GuardWithBTI = AFI->branchTargetEnforcement();
// Determine whether this is a non-secure function call.
if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
@@ -2713,7 +2733,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
if (Subtarget->isThumb()) {
- if (isCmseNSCall)
+ if (GuardWithBTI)
+ CallOpc = ARMISD::t2CALL_BTI;
+ else if (isCmseNSCall)
CallOpc = ARMISD::tSECALL;
else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
@@ -2930,9 +2952,17 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
// Indirect tail calls cannot be optimized for Thumb1 if the args
// to the call take up r0-r3. The reason is that there are no legal registers
// left to hold the pointer to the function to be called.
- if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
- (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
- return false;
+ // Similarly, if the function uses return address sign and authentication,
+ // r12 is needed to hold the PAC and is not available to hold the callee
+ // address.
+ if (Outs.size() >= 4 &&
+ (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
+ if (Subtarget->isThumb1Only())
+ return false;
+ // Conservatively assume the function spills LR.
+ if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))
+ return false;
+ }
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
@@ -7616,7 +7646,10 @@ static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG,
unsigned NumElts = VT.getVectorNumElements();
unsigned BoolMask;
unsigned BitsPerBool;
- if (NumElts == 4) {
+ if (NumElts == 2) {
+ BitsPerBool = 8;
+ BoolMask = 0xff;
+ } else if (NumElts == 4) {
BitsPerBool = 4;
BoolMask = 0xf;
} else if (NumElts == 8) {
@@ -7699,6 +7732,46 @@ static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(N, DL, MVT::i32));
}
+// Returns true if the operation N can be treated as qr instruction variant at
+// operand Op.
+static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
+ switch (N->getOpcode()) {
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ return true;
+ case ISD::SUB:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ return N->getOperand(1).getNode() == Op;
+ case ISD::INTRINSIC_WO_CHAIN:
+ switch (N->getConstantOperandVal(0)) {
+ case Intrinsic::arm_mve_add_predicated:
+ case Intrinsic::arm_mve_mul_predicated:
+ case Intrinsic::arm_mve_qadd_predicated:
+ case Intrinsic::arm_mve_vhadd:
+ case Intrinsic::arm_mve_hadd_predicated:
+ case Intrinsic::arm_mve_vqdmulh:
+ case Intrinsic::arm_mve_qdmulh_predicated:
+ case Intrinsic::arm_mve_vqrdmulh:
+ case Intrinsic::arm_mve_qrdmulh_predicated:
+ case Intrinsic::arm_mve_vqdmull:
+ case Intrinsic::arm_mve_vqdmull_predicated:
+ return true;
+ case Intrinsic::arm_mve_sub_predicated:
+ case Intrinsic::arm_mve_qsub_predicated:
+ case Intrinsic::arm_mve_vhsub:
+ case Intrinsic::arm_mve_hsub_predicated:
+ return N->getOperand(2).getNode() == Op;
+ default:
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
@@ -7720,6 +7793,20 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (SplatUndef.isAllOnes())
return DAG.getUNDEF(VT);
+ // If all the users of this constant splat are qr instruction variants,
+ // generate a vdup of the constant.
+ if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
+ (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
+ all_of(BVN->uses(),
+ [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
+ EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
+ : SplatBitSize == 16 ? MVT::v8i16
+ : MVT::v16i8;
+ SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
+ SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
+ }
+
if ((ST->hasNEON() && SplatBitSize <= 64) ||
(ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
// Check if an immediate VMOV works.
@@ -8313,9 +8400,8 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
SDLoc DL(Op);
SmallVector<SDValue, 8> VTBLMask;
- for (ArrayRef<int>::iterator
- I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
- VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
+ for (int I : ShuffleMask)
+ VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
if (V2.getNode()->isUndef())
return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
@@ -8346,6 +8432,8 @@ static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
static EVT getVectorTyFromPredicateVector(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::v2i1:
+ return MVT::v2f64;
case MVT::v4i1:
return MVT::v4i32;
case MVT::v8i1:
@@ -8427,7 +8515,14 @@ static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG,
DAG.getUNDEF(NewVT), ShuffleMask);
// Now return the result of comparing the shuffled vector with zero,
- // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
+ // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
+ // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
+ if (VT == MVT::v2i1) {
+ SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
+ SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+ return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
+ }
return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
}
@@ -8927,8 +9022,15 @@ static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
ConVec = ExtractInto(NewV1, ConVec, j);
ConVec = ExtractInto(NewV2, ConVec, j);
- // Now return the result of comparing the subvector with zero,
- // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
+ // Now return the result of comparing the subvector with zero, which will
+ // generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1 we
+ // convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
+ if (VT == MVT::v2i1) {
+ SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, ConVec);
+ SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+ return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
+ }
return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
};
@@ -8993,6 +9095,22 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,
MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
+ if (NumElts == 2) {
+ EVT SubVT = MVT::v4i32;
+ SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
+ for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
+ DAG.getIntPtrConstant(i, dl));
+ SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
+ DAG.getConstant(j, dl, MVT::i32));
+ SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
+ DAG.getConstant(j + 1, dl, MVT::i32));
+ }
+ SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+ return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
+ }
+
EVT SubVT = MVT::getVectorVT(ElType, NumElts);
SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
@@ -9839,16 +9957,17 @@ void ARMTargetLowering::ExpandDIV_Windows(
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
EVT MemVT = LD->getMemoryVT();
- assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
+ assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
+ MemVT == MVT::v16i1) &&
"Expected a predicate type!");
assert(MemVT == Op.getValueType());
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
"Expected a non-extending load");
assert(LD->isUnindexed() && "Expected a unindexed load");
- // The basic MVE VLDR on a v4i1/v8i1 actually loads the entire 16bit
+ // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
// predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
- // need to make sure that 8/4 bits are actually loaded into the correct
+ // need to make sure that 8/4/2 bits are actually loaded into the correct
// place, which means loading the value and then shuffling the values into
// the bottom bits of the predicate.
// Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
@@ -9895,14 +10014,15 @@ void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT MemVT = ST->getMemoryVT();
- assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
+ assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
+ MemVT == MVT::v16i1) &&
"Expected a predicate type!");
assert(MemVT == ST->getValue().getValueType());
assert(!ST->isTruncatingStore() && "Expected a non-extending store");
assert(ST->isUnindexed() && "Expected a unindexed store");
- // Only store the v4i1 or v8i1 worth of bits, via a buildvector with top bits
- // unset and a scalar store.
+ // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
+ // top bits unset and a scalar store.
SDLoc dl(Op);
SDValue Build = ST->getValue();
if (MemVT != MVT::v16i1) {
@@ -9953,7 +10073,7 @@ static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
{ST->getChain(), Lo, Hi, ST->getBasePtr()},
MemVT, ST->getMemOperand());
} else if (Subtarget->hasMVEIntegerOps() &&
- ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
+ ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
MemVT == MVT::v16i1))) {
return LowerPredicateStore(Op, DAG);
}
@@ -10561,25 +10681,23 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// associated with.
DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
unsigned MaxCSNum = 0;
- for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
- ++BB) {
- if (!BB->isEHPad()) continue;
+ for (MachineBasicBlock &BB : *MF) {
+ if (!BB.isEHPad())
+ continue;
// FIXME: We should assert that the EH_LABEL is the first MI in the landing
// pad.
- for (MachineBasicBlock::iterator
- II = BB->begin(), IE = BB->end(); II != IE; ++II) {
- if (!II->isEHLabel()) continue;
+ for (MachineInstr &II : BB) {
+ if (!II.isEHLabel())
+ continue;
- MCSymbol *Sym = II->getOperand(0).getMCSymbol();
+ MCSymbol *Sym = II.getOperand(0).getMCSymbol();
if (!MF->hasCallSiteLandingPad(Sym)) continue;
SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
- for (SmallVectorImpl<unsigned>::iterator
- CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
- CSI != CSE; ++CSI) {
- CallSiteNumToLPad[*CSI].push_back(&*BB);
- MaxCSNum = std::max(MaxCSNum, *CSI);
+ for (unsigned Idx : CallSiteIdxs) {
+ CallSiteNumToLPad[Idx].push_back(&BB);
+ MaxCSNum = std::max(MaxCSNum, Idx);
}
break;
}
@@ -14002,8 +14120,8 @@ static SDValue PerformANDCombine(SDNode *N,
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v4i1 ||
- VT == MVT::v8i1 || VT == MVT::v16i1)
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
+ VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
return SDValue();
APInt SplatBits, SplatUndef;
@@ -14298,8 +14416,8 @@ static SDValue PerformORCombine(SDNode *N,
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
- if (Subtarget->hasMVEIntegerOps() &&
- (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1))
+ if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
+ VT == MVT::v8i1 || VT == MVT::v16i1))
return PerformORCombine_i1(N, DAG, Subtarget);
APInt SplatBits, SplatUndef;
@@ -14569,6 +14687,15 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
return SDValue();
SDValue CSInc = Cmp->getOperand(0);
+
+ // Ignore any `And 1` nodes that may not yet have been removed. We are
+ // looking for a value that produces 1/0, so these have no effect on the
+ // code.
+ while (CSInc.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(CSInc.getOperand(1)) &&
+ CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
+ CSInc = CSInc.getOperand(0);
+
if (CSInc.getOpcode() != ARMISD::CSINC ||
!isNullConstant(CSInc.getOperand(0)) ||
!isNullConstant(CSInc.getOperand(1)) || !CSInc->hasOneUse())
@@ -17897,6 +18024,23 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
if (!VT.isInteger())
return SDValue();
+ // Fold away an unneccessary CMPZ/CMOV
+ // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
+ // if C1==EQ -> CMOV A, B, C2, $cpsr, D
+ // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
+ if (N->getConstantOperandVal(2) == ARMCC::EQ ||
+ N->getConstantOperandVal(2) == ARMCC::NE) {
+ ARMCC::CondCodes Cond;
+ if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
+ if (N->getConstantOperandVal(2) == ARMCC::NE)
+ Cond = ARMCC::getOppositeCondition(Cond);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
+ N->getOperand(1),
+ DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
+ N->getOperand(3), C);
+ }
+ }
+
// Materialize a boolean comparison for integers so we can avoid branching.
if (isNullConstant(FalseVal)) {
if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
@@ -18564,7 +18708,8 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
return false;
// These are for predicates
- if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1)) {
+ if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
+ Ty == MVT::v2i1)) {
if (Fast)
*Fast = true;
return true;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index e3b422358cae..1c5f8389f57c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -69,6 +69,7 @@ class VectorType;
CALL_PRED, // Function call that's predicable.
CALL_NOLINK, // Function call with branch not branch-and-link.
tSECALL, // CMSE non-secure function call.
+ t2CALL_BTI, // Thumb function call followed by BTI instruction.
BRCOND, // Conditional branch.
BR_JT, // Jumptable branch.
BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index f53814a80e01..1ae0354ffc37 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -254,13 +254,6 @@ class MVEVectorVTInfo<ValueType vec, ValueType dblvec,
// An LLVM ValueType representing a corresponding vector of
// predicate bits, for use in ISel patterns that handle an IR
// intrinsic describing the predicated form of the instruction.
- //
- // Usually, for a vector of N things, this will be vNi1. But for
- // vectors of 2 values, we make an exception, and use v4i1 instead
- // of v2i1. Rationale: MVE codegen doesn't support doing all the
- // auxiliary operations on v2i1 (vector shuffles etc), and also,
- // there's no MVE compare instruction that will _generate_ v2i1
- // directly.
ValueType Pred = pred;
// Same as Pred but for DblVec rather than Vec.
@@ -294,25 +287,25 @@ class MVEVectorVTInfo<ValueType vec, ValueType dblvec,
// Integer vector types that don't treat signed and unsigned differently.
def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "i", ?>;
def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "i", ?>;
-def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "i", ?>;
-def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "i", ?>;
+def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v2i1, 0b10, "i", ?>;
+def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v2i1, ?, 0b11, "i", ?>;
// Explicitly signed and unsigned integer vectors. They map to the
// same set of LLVM ValueTypes as above, but are represented
// differently in assembly and instruction encodings.
def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "s", 0b0>;
def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "s", 0b0>;
-def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "s", 0b0>;
-def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "s", 0b0>;
+def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v2i1, 0b10, "s", 0b0>;
+def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v2i1, ?, 0b11, "s", 0b0>;
def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "u", 0b1>;
def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "u", 0b1>;
-def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "u", 0b1>;
-def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "u", 0b1>;
+def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v2i1, 0b10, "u", 0b1>;
+def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v2i1, ?, 0b11, "u", 0b1>;
// FP vector types.
def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, v4i1, 0b01, "f", ?>;
-def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, v4i1, 0b10, "f", ?>;
-def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, ?, 0b11, "f", ?>;
+def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, v2i1, 0b10, "f", ?>;
+def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v2i1, ?, 0b11, "f", ?>;
// Polynomial vector types.
def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b11, "p", 0b0>;
@@ -2260,6 +2253,31 @@ let Predicates = [HasMVEInt] in {
(v4i32 (ARMvmovImm (i32 1)))),
(i32 1))),
(MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
+
+ def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
+ (v16i8 (ARMvdup (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
+ (v8i16 (ARMvdup (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
+ (v4i32 (ARMvdup (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
+ (v16i8 (ARMvdup (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
+ (v8i16 (ARMvdup (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
+ def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
+ (v4i32 (ARMvdup (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
}
@@ -4450,6 +4468,11 @@ multiclass two_predops<SDPatternOperator opnode, Instruction insn> {
(insn (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p1), rGPR)),
(i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p2), rGPR))),
VCCR))>;
+ def v2i1 : Pat<(v2i1 (opnode (v2i1 VCCR:$p1), (v2i1 VCCR:$p2))),
+ (v2i1 (COPY_TO_REGCLASS
+ (insn (i32 (COPY_TO_REGCLASS (v2i1 VCCR:$p1), rGPR)),
+ (i32 (COPY_TO_REGCLASS (v2i1 VCCR:$p2), rGPR))),
+ VCCR))>;
}
let Predicates = [HasMVEInt] in {
@@ -4469,20 +4492,20 @@ def load_align4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
}]>;
let Predicates = [HasMVEInt] in {
- foreach VT = [ v4i1, v8i1, v16i1 ] in {
+ foreach VT = [ v2i1, v4i1, v8i1, v16i1 ] in {
def : Pat<(i32 (predicate_cast (VT VCCR:$src))),
(i32 (COPY_TO_REGCLASS (VT VCCR:$src), VCCR))>;
def : Pat<(VT (predicate_cast (i32 VCCR:$src))),
(VT (COPY_TO_REGCLASS (i32 VCCR:$src), VCCR))>;
- foreach VT2 = [ v4i1, v8i1, v16i1 ] in
+ foreach VT2 = [ v2i1, v4i1, v8i1, v16i1 ] in
def : Pat<(VT (predicate_cast (VT2 VCCR:$src))),
(VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
}
// If we happen to be casting from a load we can convert that straight
// into a predicate load, so long as the load is of the correct type.
- foreach VT = [ v4i1, v8i1, v16i1 ] in {
+ foreach VT = [ v2i1, v4i1, v8i1, v16i1 ] in {
def : Pat<(VT (predicate_cast (i32 (load_align4 taddrmode_imm7<2>:$addr)))),
(VT (VLDR_P0_off taddrmode_imm7<2>:$addr))>;
}
@@ -5350,33 +5373,40 @@ class MVE_VxADDSUB_qr<string iname, string suffix,
}
multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
- Intrinsic unpred_int, Intrinsic pred_int> {
+ Intrinsic unpred_int, Intrinsic pred_int, PatFrag add_op,
+ SDNode shift_op> {
def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract, VTI.Size>;
defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME),
VTI, unpred_int, pred_int, 1, 1>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (shift_op (add_op (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn))), (i32 1))),
+ (Inst MQPR:$Qm, rGPR:$Rn)>;
+ }
}
-multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI> :
- MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, int_arm_mve_vhadd,
- int_arm_mve_hadd_predicated>;
+multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op> :
+ MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, int_arm_mve_vhadd, int_arm_mve_hadd_predicated,
+ add_op, shift_op>;
-multiclass MVE_VHSUB_qr_m<MVEVectorVTInfo VTI> :
- MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, int_arm_mve_vhsub,
- int_arm_mve_hsub_predicated>;
+multiclass MVE_VHSUB_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op> :
+ MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, int_arm_mve_vhsub, int_arm_mve_hsub_predicated,
+ add_op, shift_op>;
-defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8>;
-defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16>;
-defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32>;
-defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8>;
-defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16>;
-defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32>;
+defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8, addnsw, ARMvshrsImm>;
+defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16, addnsw, ARMvshrsImm>;
+defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32, addnsw, ARMvshrsImm>;
+defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8, addnuw, ARMvshruImm>;
+defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16, addnuw, ARMvshruImm>;
+defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32, addnuw, ARMvshruImm>;
-defm MVE_VHSUB_qr_s8 : MVE_VHSUB_qr_m<MVE_v16s8>;
-defm MVE_VHSUB_qr_s16 : MVE_VHSUB_qr_m<MVE_v8s16>;
-defm MVE_VHSUB_qr_s32 : MVE_VHSUB_qr_m<MVE_v4s32>;
-defm MVE_VHSUB_qr_u8 : MVE_VHSUB_qr_m<MVE_v16u8>;
-defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16>;
-defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
+defm MVE_VHSUB_qr_s8 : MVE_VHSUB_qr_m<MVE_v16s8, subnsw, ARMvshrsImm>;
+defm MVE_VHSUB_qr_s16 : MVE_VHSUB_qr_m<MVE_v8s16, subnsw, ARMvshrsImm>;
+defm MVE_VHSUB_qr_s32 : MVE_VHSUB_qr_m<MVE_v4s32, subnsw, ARMvshrsImm>;
+defm MVE_VHSUB_qr_u8 : MVE_VHSUB_qr_m<MVE_v16u8, subnuw, ARMvshruImm>;
+defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16, subnuw, ARMvshruImm>;
+defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32, subnuw, ARMvshruImm>;
multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
@@ -6778,11 +6808,15 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
(v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
+ def : Pat<(v2i64 (vselect (v2i1 VCCR:$pred), (v2i64 MQPR:$v1), (v2i64 MQPR:$v2))),
+ (v2i64 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
(v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
(v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
+ def : Pat<(v2f64 (vselect (v2i1 VCCR:$pred), (v2f64 MQPR:$v1), (v2f64 MQPR:$v2))),
+ (v2f64 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
(v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
@@ -6808,6 +6842,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))),
(v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
+ def : Pat<(v2i64 (zext (v2i1 VCCR:$pred))),
+ (v2i64 (MVE_VPSEL (MVE_VMOVimmi64 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))),
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
@@ -6815,6 +6851,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))),
(v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
+ def : Pat<(v2i64 (sext (v2i1 VCCR:$pred))),
+ (v2i64 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))),
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
@@ -6822,6 +6860,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
(v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
+ def : Pat<(v2i64 (anyext (v2i1 VCCR:$pred))),
+ (v2i64 (MVE_VPSEL (MVE_VMOVimmi64 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
}
let Predicates = [HasMVEFloat] in {
@@ -6862,6 +6902,8 @@ def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary,
}
let Predicates = [HasMVEInt] in {
+ def : Pat<(v2i1 (xor (v2i1 VCCR:$pred), (v2i1 (predicate_cast (i32 65535))))),
+ (v2i1 (MVE_VPNOT (v2i1 VCCR:$pred)))>;
def : Pat<(v4i1 (xor (v4i1 VCCR:$pred), (v4i1 (predicate_cast (i32 65535))))),
(v4i1 (MVE_VPNOT (v4i1 VCCR:$pred)))>;
def : Pat<(v8i1 (xor (v8i1 VCCR:$pred), (v8i1 (predicate_cast (i32 65535))))),
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 4471317f4ea4..6e8e61ca2b8e 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -5736,3 +5736,10 @@ def t2BTI : PACBTIHintSpaceNoOpsInst<"bti", 0b00001111>;
def t2AUT : PACBTIHintSpaceUseInst<"aut", 0b00101101> {
let hasSideEffects = 1;
}
+
+def ARMt2CallBTI : SDNode<"ARMISD::t2CALL_BTI", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def t2CALL_BTI : PseudoInst<(outs), (ins pred:$p, thumb_bl_target:$func),
+ IIC_Br, [(ARMt2CallBTI tglobaladdr:$func)]>,
+ Requires<[IsThumb2]>, Sched<[WriteBrL]>;
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 9d1bfa414dff..dc5f1b92a6c2 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -1076,6 +1076,9 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm,
}
}
+ def : InstAlias<!strconcat("vrint", opc, ".f16.f16\t$Sd, $Sm"),
+ (!cast<Instruction>(NAME#"H") HPR:$Sd, HPR:$Sm), 0>,
+ Requires<[HasFullFP16]>;
def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"),
(!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm), 0>,
Requires<[HasFPARMv8]>;
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 3b10c60a0654..ef5fc12feb54 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -2121,7 +2121,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
bool Modified = false;
for (MachineBasicBlock &MBB : Fn) {
Modified |= LoadStoreMultipleOpti(MBB);
- if (STI->hasV5TOps())
+ if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
Modified |= MergeReturnIntoLDM(MBB);
if (isThumb1)
Modified |= CombineMovBx(MBB);
@@ -2349,9 +2349,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
unsigned LastOpcode = 0;
unsigned LastBytes = 0;
unsigned NumMove = 0;
- for (int i = Ops.size() - 1; i >= 0; --i) {
+ for (MachineInstr *Op : llvm::reverse(Ops)) {
// Make sure each operation has the same kind.
- MachineInstr *Op = Ops[i];
unsigned LSMOpcode
= getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
if (LastOpcode && LSMOpcode != LastOpcode)
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 3874db5792d6..f822672c4477 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -251,10 +251,7 @@ namespace {
SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI]->Predicates;
if (Exclusive && Predicates.size() != 1)
return false;
- for (auto *PredMI : Predicates)
- if (isVCTP(PredMI))
- return true;
- return false;
+ return llvm::any_of(Predicates, isVCTP);
}
// Is the VPST, controlling the block entry, predicated upon a VCTP.
@@ -351,10 +348,7 @@ namespace {
}
bool containsVCTP() const {
- for (auto *MI : Insts)
- if (isVCTP(MI))
- return true;
- return false;
+ return llvm::any_of(Insts, isVCTP);
}
unsigned size() const { return Insts.size(); }
@@ -1334,8 +1328,8 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
bool Changed = false;
// Process inner loops first.
- for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
- Changed |= ProcessLoop(*I);
+ for (MachineLoop *L : *ML)
+ Changed |= ProcessLoop(L);
LLVM_DEBUG({
dbgs() << "ARM Loops: Processing loop containing:\n";
@@ -1699,7 +1693,7 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
// If any of the instructions between the VCMP and VPST are predicated
// then a different code path is expected to have merged the VCMP and
// VPST already.
- if (!std::any_of(++MachineBasicBlock::iterator(VCMP),
+ if (std::none_of(++MachineBasicBlock::iterator(VCMP),
MachineBasicBlock::iterator(VPST), hasVPRUse) &&
RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(1).getReg()) &&
RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(2).getReg())) {
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 4077fc058217..d8d937055d23 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -289,7 +289,7 @@ public:
return false;
if (SignReturnAddressAll)
return true;
- return LRSpilled;
+ return SpillsLR;
}
bool branchTargetEnforcement() const { return BranchTargetEnforcement; }
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 760a5a5a20cf..194d65cad8d1 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -211,6 +211,8 @@ def FPCXTS : ARMReg<15, "fpcxts">;
def ZR : ARMReg<15, "zr">, DwarfRegNum<[15]>;
+def RA_AUTH_CODE : ARMReg<12, "ra_auth_code">, DwarfRegNum<[143]>;
+
// Register classes.
//
// pc == Program Counter
@@ -395,7 +397,7 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
}
// MVE Condition code register.
-def VCCR : RegisterClass<"ARM", [i32, v16i1, v8i1, v4i1], 32, (add VPR)> {
+def VCCR : RegisterClass<"ARM", [i32, v16i1, v8i1, v4i1, v2i1], 32, (add VPR)> {
// let CopyCost = -1; // Don't allow copying of status registers.
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index d51a888c951f..e61b90af31b0 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -18,6 +18,7 @@
#include "ARMConstantPoolValue.h"
#include "ARMFrameLowering.h"
#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
#include "ARMSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -534,6 +535,10 @@ protected:
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
+ /// NoBTIAtReturnTwice - Don't place a BTI instruction after
+ /// return-twice constructs (setjmp)
+ bool NoBTIAtReturnTwice = false;
+
/// Options passed via command line that could influence the target
const TargetOptions &Options;
@@ -840,6 +845,8 @@ public:
/// to lr. This is always required on Thumb1-only targets, as the push and
/// pop instructions can't access the high registers.
bool splitFramePushPop(const MachineFunction &MF) const {
+ if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress())
+ return true;
return (getFramePointerReg() == ARM::R7 &&
MF.getTarget().Options.DisableFramePointerElim(MF)) ||
isThumb1Only();
@@ -948,6 +955,8 @@ public:
bool hardenSlsRetBr() const { return HardenSlsRetBr; }
bool hardenSlsBlr() const { return HardenSlsBlr; }
bool hardenSlsNoComdat() const { return HardenSlsNoComdat; }
+
+ bool getNoBTIAtReturnTwice() const { return NoBTIAtReturnTwice; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 39f407ba7149..bfe078b06861 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -137,21 +137,18 @@ public:
int getFPReg() const { return FPReg; }
void emitFnStartLocNotes() const {
- for (Locs::const_iterator FI = FnStartLocs.begin(), FE = FnStartLocs.end();
- FI != FE; ++FI)
- Parser.Note(*FI, ".fnstart was specified here");
+ for (const SMLoc &Loc : FnStartLocs)
+ Parser.Note(Loc, ".fnstart was specified here");
}
void emitCantUnwindLocNotes() const {
- for (Locs::const_iterator UI = CantUnwindLocs.begin(),
- UE = CantUnwindLocs.end(); UI != UE; ++UI)
- Parser.Note(*UI, ".cantunwind was specified here");
+ for (const SMLoc &Loc : CantUnwindLocs)
+ Parser.Note(Loc, ".cantunwind was specified here");
}
void emitHandlerDataLocNotes() const {
- for (Locs::const_iterator HI = HandlerDataLocs.begin(),
- HE = HandlerDataLocs.end(); HI != HE; ++HI)
- Parser.Note(*HI, ".handlerdata was specified here");
+ for (const SMLoc &Loc : HandlerDataLocs)
+ Parser.Note(Loc, ".handlerdata was specified here");
}
void emitPersonalityLocNotes() const {
@@ -452,7 +449,8 @@ class ARMAsmParser : public MCTargetAsmParser {
int tryParseRegister();
bool tryParseRegisterWithWriteBack(OperandVector &);
int tryParseShiftRegister(OperandVector &);
- bool parseRegisterList(OperandVector &, bool EnforceOrder = true);
+ bool parseRegisterList(OperandVector &, bool EnforceOrder = true,
+ bool AllowRAAC = false);
bool parseMemory(OperandVector &);
bool parseOperand(OperandVector &, StringRef Mnemonic);
bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
@@ -2572,17 +2570,15 @@ public:
void addRegListOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const SmallVectorImpl<unsigned> &RegList = getRegList();
- for (SmallVectorImpl<unsigned>::const_iterator
- I = RegList.begin(), E = RegList.end(); I != E; ++I)
- Inst.addOperand(MCOperand::createReg(*I));
+ for (unsigned Reg : RegList)
+ Inst.addOperand(MCOperand::createReg(Reg));
}
void addRegListWithAPSROperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const SmallVectorImpl<unsigned> &RegList = getRegList();
- for (SmallVectorImpl<unsigned>::const_iterator
- I = RegList.begin(), E = RegList.end(); I != E; ++I)
- Inst.addOperand(MCOperand::createReg(*I));
+ for (unsigned Reg : RegList)
+ Inst.addOperand(MCOperand::createReg(Reg));
}
void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
@@ -4464,8 +4460,8 @@ insertNoDuplicates(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
}
/// Parse a register list.
-bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
- bool EnforceOrder) {
+bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
+ bool AllowRAAC) {
MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::LCurly))
return TokError("Token is not a Left Curly Brace");
@@ -4478,7 +4474,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
int Reg = tryParseRegister();
if (Reg == -1)
return Error(RegLoc, "register expected");
-
+ if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE)
+ return Error(RegLoc, "pseudo-register not allowed");
// The reglist instructions have at most 16 registers, so reserve
// space for that many.
int EReg = 0;
@@ -4492,7 +4489,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
++Reg;
}
const MCRegisterClass *RC;
- if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ if (Reg == ARM::RA_AUTH_CODE ||
+ ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
RC = &ARMMCRegisterClasses[ARM::GPRRegClassID];
else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg))
RC = &ARMMCRegisterClasses[ARM::DPRRegClassID];
@@ -4513,11 +4511,15 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
while (Parser.getTok().is(AsmToken::Comma) ||
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
+ if (Reg == ARM::RA_AUTH_CODE)
+ return Error(RegLoc, "pseudo-register not allowed");
Parser.Lex(); // Eat the minus.
SMLoc AfterMinusLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
if (EndReg == -1)
return Error(AfterMinusLoc, "register expected");
+ if (EndReg == ARM::RA_AUTH_CODE)
+ return Error(AfterMinusLoc, "pseudo-register not allowed");
// Allow Q regs and just interpret them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
EndReg = getDRegFromQReg(EndReg) + 1;
@@ -4526,7 +4528,9 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
if (Reg == EndReg)
continue;
// The register must be in the same register class as the first.
- if (!RC->contains(EndReg))
+ if ((Reg == ARM::RA_AUTH_CODE &&
+ RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) ||
+ (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg)))
return Error(AfterMinusLoc, "invalid register in register list");
// Ranges must go from low to high.
if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
@@ -4551,13 +4555,15 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
Reg = tryParseRegister();
if (Reg == -1)
return Error(RegLoc, "register expected");
+ if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE)
+ return Error(RegLoc, "pseudo-register not allowed");
// Allow Q regs and just interpret them as the two D sub-registers.
bool isQReg = false;
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
Reg = getDRegFromQReg(Reg);
isQReg = true;
}
- if (!RC->contains(Reg) &&
+ if (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg) &&
RC->getID() == ARMMCRegisterClasses[ARM::GPRRegClassID].getID() &&
ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg)) {
// switch the register classes, as GPRwithAPSRnospRegClassID is a partial
@@ -4577,7 +4583,9 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
continue;
}
// The register must be in the same register class as the first.
- if (!RC->contains(Reg))
+ if ((Reg == ARM::RA_AUTH_CODE &&
+ RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) ||
+ (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg)))
return Error(RegLoc, "invalid register in register list");
// In most cases, the list must be monotonically increasing. An
// exception is CLRM, which is order-independent anyway, so
@@ -7106,13 +7114,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return Error(Loc, "too many conditions on VPT instruction");
}
unsigned Mask = 8;
- for (unsigned i = ITMask.size(); i != 0; --i) {
- char pos = ITMask[i - 1];
- if (pos != 't' && pos != 'e') {
+ for (char Pos : llvm::reverse(ITMask)) {
+ if (Pos != 't' && Pos != 'e') {
return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
}
Mask >>= 1;
- if (ITMask[i - 1] == 'e')
+ if (Pos == 'e')
Mask |= 8;
}
Operands.push_back(ARMOperand::CreateITMask(Mask, Loc));
@@ -11685,7 +11692,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
// Parse the register list
- if (parseRegisterList(Operands) ||
+ if (parseRegisterList(Operands, true, true) ||
parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
return true;
ARMOperand &Op = (ARMOperand &)*Operands[0];
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 896b104e8d97..e060e59e3759 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -1289,34 +1289,65 @@ void ARMELFStreamer::emitPad(int64_t Offset) {
PendingOffset -= Offset;
}
-void ARMELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
- bool IsVector) {
- // Collect the registers in the register list
- unsigned Count = 0;
+static std::pair<unsigned, unsigned>
+collectHWRegs(const MCRegisterInfo &MRI, unsigned Idx,
+ const SmallVectorImpl<unsigned> &RegList, bool IsVector,
+ uint32_t &Mask_) {
uint32_t Mask = 0;
- const MCRegisterInfo *MRI = getContext().getRegisterInfo();
- for (size_t i = 0; i < RegList.size(); ++i) {
- unsigned Reg = MRI->getEncodingValue(RegList[i]);
+ unsigned Count = 0;
+ while (Idx > 0) {
+ unsigned Reg = RegList[Idx - 1];
+ if (Reg == ARM::RA_AUTH_CODE)
+ break;
+ Reg = MRI.getEncodingValue(Reg);
assert(Reg < (IsVector ? 32U : 16U) && "Register out of range");
unsigned Bit = (1u << Reg);
if ((Mask & Bit) == 0) {
Mask |= Bit;
++Count;
}
+ --Idx;
}
- // Track the change the $sp offset: For the .save directive, the
- // corresponding push instruction will decrease the $sp by (4 * Count).
- // For the .vsave directive, the corresponding vpush instruction will
- // decrease $sp by (8 * Count).
- SPOffset -= Count * (IsVector ? 8 : 4);
+ Mask_ = Mask;
+ return {Idx, Count};
+}
- // Emit the opcode
- FlushPendingOffset();
- if (IsVector)
- UnwindOpAsm.EmitVFPRegSave(Mask);
- else
- UnwindOpAsm.EmitRegSave(Mask);
+void ARMELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool IsVector) {
+ uint32_t Mask;
+ unsigned Idx, Count;
+ const MCRegisterInfo &MRI = *getContext().getRegisterInfo();
+
+ // Collect the registers in the register list. Issue unwinding instructions in
+ // three parts: ordinary hardware registers, return address authentication
+ // code pseudo register, the rest of the registers. The RA PAC is kept in an
+ // architectural register (usually r12), but we treat it as a special case in
+ // order to distinguish between that register containing RA PAC or a general
+ // value.
+ Idx = RegList.size();
+ while (Idx > 0) {
+ std::tie(Idx, Count) = collectHWRegs(MRI, Idx, RegList, IsVector, Mask);
+ if (Count) {
+ // Track the change the $sp offset: For the .save directive, the
+ // corresponding push instruction will decrease the $sp by (4 * Count).
+ // For the .vsave directive, the corresponding vpush instruction will
+ // decrease $sp by (8 * Count).
+ SPOffset -= Count * (IsVector ? 8 : 4);
+
+ // Emit the opcode
+ FlushPendingOffset();
+ if (IsVector)
+ UnwindOpAsm.EmitVFPRegSave(Mask);
+ else
+ UnwindOpAsm.EmitRegSave(Mask);
+ } else if (Idx > 0 && RegList[Idx - 1] == ARM::RA_AUTH_CODE) {
+ --Idx;
+ SPOffset -= 4;
+ FlushPendingOffset();
+ UnwindOpAsm.EmitRegSave(0);
+ }
+ }
}
void ARMELFStreamer::emitUnwindRaw(int64_t Offset,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 781627c3c425..50f416b23db2 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -64,8 +64,11 @@ namespace {
} // end anonymous namespace
void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
- if (RegSave == 0u)
+ if (RegSave == 0u) {
+ // That's the special case for RA PAC.
+ EmitInt8(ARM::EHABI::UNWIND_OPCODE_POP_RA_AUTH_CODE);
return;
+ }
// One byte opcode to save register r14 and r11-r4
if (RegSave & (1u << 4)) {
diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
index dc58b5427425..7e31ea77f4f5 100644
--- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
+++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
@@ -366,7 +366,7 @@ bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
while (!Worklist.empty()) {
Register Reg = Worklist.pop_back_val();
for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
- if (count(ExpectedUsers, &MI))
+ if (llvm::is_contained(ExpectedUsers, &MI))
continue;
if (MI.getOpcode() != TargetOpcode::COPY ||
!MI.getOperand(0).getReg().isVirtual()) {
diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp
index 6a5bc9284266..0e6960bce32b 100644
--- a/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -213,7 +213,8 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
auto *TC = SE->getSCEV(TripCount);
int VectorWidth =
cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
- if (VectorWidth != 4 && VectorWidth != 8 && VectorWidth != 16)
+ if (VectorWidth != 2 && VectorWidth != 4 && VectorWidth != 8 &&
+ VectorWidth != 16)
return false;
ConstantInt *ConstElemCount = nullptr;
@@ -371,15 +372,10 @@ void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
switch (VectorWidth) {
default:
llvm_unreachable("unexpected number of lanes");
+ case 2: VCTPID = Intrinsic::arm_mve_vctp64; break;
case 4: VCTPID = Intrinsic::arm_mve_vctp32; break;
case 8: VCTPID = Intrinsic::arm_mve_vctp16; break;
case 16: VCTPID = Intrinsic::arm_mve_vctp8; break;
-
- // FIXME: vctp64 currently not supported because the predicate
- // vector wants to be <2 x i1>, but v2i1 is not a legal MVE
- // type, so problems happen at isel time.
- // Intrinsic::arm_mve_vctp64 exists for ACLE intrinsics
- // purposes, but takes a v4i1 instead of a v2i1.
}
Function *VCTP = Intrinsic::getDeclaration(M, VCTPID);
Value *VCTPCall = Builder.CreateCall(VCTP, Processed);
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 224c61b9f065..54e80a095dd4 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -824,8 +824,8 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
ARMRegSet CopyRegs; // Registers which can be used after pushing
// LoRegs for saving HiRegs.
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
+ for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
+ unsigned Reg = I.getReg();
if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
LoRegsToSave[Reg] = true;
@@ -1021,8 +1021,7 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
bool NeedsPop = false;
- for (unsigned i = CSI.size(); i != 0; --i) {
- CalleeSavedInfo &Info = CSI[i-1];
+ for (CalleeSavedInfo &Info : llvm::reverse(CSI)) {
unsigned Reg = Info.getReg();
// High registers (excluding lr) have already been dealt with
@@ -1067,7 +1066,7 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
if (NeedsPop)
MBB.insert(MI, &*MIB);
else
- MF.DeleteMachineInstr(MIB);
+ MF.deleteMachineInstr(MIB);
return true;
}
diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
index 672611ea2234..543d94875037 100644
--- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -247,8 +247,8 @@ bool AVRFrameLowering::spillCalleeSavedRegisters(
const TargetInstrInfo &TII = *STI.getInstrInfo();
AVRMachineFunctionInfo *AVRFI = MF.getInfo<AVRMachineFunctionInfo>();
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i - 1].getReg();
+ for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
+ unsigned Reg = I.getReg();
bool IsNotLiveIn = !MBB.isLiveIn(Reg);
assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 &&
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index 798d08393eae..51060018a5ca 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -571,8 +571,6 @@ void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// See lib/CodeGen/RegisterRelaxation.cpp for details.
// We end up here when a jump is too long for a RJMP instruction.
BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB);
-
- return;
}
} // end of namespace llvm
diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
index 0348e2200acb..36237b2fc4fd 100644
--- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
+++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
@@ -93,8 +93,13 @@ static bool BPFPreserveDITypeImpl(Function &F) {
Ty = DTy->getBaseType();
}
- if (Ty->getName().empty())
- report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc");
+ if (Ty->getName().empty()) {
+ if (isa<DISubroutineType>(Ty))
+ report_fatal_error(
+ "SubroutineType not supported for BTF_TYPE_ID_REMOTE reloc");
+ else
+ report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc");
+ }
MD = Ty;
}
diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
index ebc04b40d428..29b99a84a6cd 100644
--- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
+++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/CSKYMCTargetDesc.h"
#include "TargetInfo/CSKYTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCContext.h"
@@ -25,11 +26,24 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+using namespace llvm;
+
#define DEBUG_TYPE "csky-asm-parser"
-using namespace llvm;
+// Include the auto-generated portion of the compress emitter.
+#define GEN_COMPRESS_INSTR
+#include "CSKYGenCompressInstEmitter.inc"
+
+STATISTIC(CSKYNumInstrsCompressed,
+ "Number of C-SKY Compressed instructions emitted");
+
+static cl::opt<bool>
+ EnableCompressedInst("enable-csky-asm-compressed-inst", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable C-SKY asm compressed instruction"));
namespace {
struct CSKYOperand;
@@ -55,6 +69,10 @@ class CSKYAsmParser : public MCTargetAsmParser {
bool ParseDirective(AsmToken DirectiveID) override;
+ // Helper to actually emit an instruction to the MCStreamer. Also, when
+ // possible, compression of the instruction is performed.
+ void emitToStreamer(MCStreamer &S, const MCInst &Inst);
+
OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) override;
@@ -264,12 +282,6 @@ public:
bool isConstpool() const { return isConstPoolOp(); }
bool isDataSymbol() const { return isConstPoolOp(); }
- bool isSPOperand() const {
- if (!isReg())
- return false;
- return getReg() == CSKY::R14;
- }
-
bool isPSRFlag() const {
int64_t Imm;
// Must be of 'immediate' type and a constant.
@@ -755,10 +767,6 @@ bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "register is out of range");
}
- case Match_InvalidSPOperand: {
- SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
- return Error(ErrorLoc, "operand must be sp register");
- }
case Match_RequiresSameSrcAndDst: {
SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "src and dst operand must be same");
@@ -776,27 +784,62 @@ bool CSKYAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
OperandVector &Operands,
MCStreamer &Out) {
- if (Inst.getOpcode() == CSKY::LDQ32 || Inst.getOpcode() == CSKY::STQ32) {
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case CSKY::LDQ32:
+ case CSKY::STQ32:
if (Inst.getOperand(1).getReg() != CSKY::R4 ||
Inst.getOperand(2).getReg() != CSKY::R7) {
return Error(IDLoc, "Register sequence is not valid. 'r4-r7' expected");
}
Inst.setOpcode(Inst.getOpcode() == CSKY::LDQ32 ? CSKY::LDM32 : CSKY::STM32);
- Out.emitInstruction(Inst, getSTI());
- return false;
- } else if (Inst.getOpcode() == CSKY::SEXT32 ||
- Inst.getOpcode() == CSKY::ZEXT32) {
+ break;
+ case CSKY::SEXT32:
+ case CSKY::ZEXT32:
if (Inst.getOperand(2).getImm() < Inst.getOperand(3).getImm())
return Error(IDLoc, "msb must be greater or equal to lsb");
- } else if (Inst.getOpcode() == CSKY::INS32) {
+ break;
+ case CSKY::INS32:
if (Inst.getOperand(3).getImm() < Inst.getOperand(4).getImm())
return Error(IDLoc, "msb must be greater or equal to lsb");
- } else if (Inst.getOpcode() == CSKY::IDLY32) {
+ break;
+ case CSKY::IDLY32:
if (Inst.getOperand(0).getImm() > 32 || Inst.getOperand(0).getImm() < 0)
return Error(IDLoc, "n must be in range [0,32]");
+ break;
+ case CSKY::ADDC32:
+ case CSKY::SUBC32:
+ case CSKY::ADDC16:
+ case CSKY::SUBC16:
+ Inst.erase(std::next(Inst.begin()));
+ Inst.erase(std::prev(Inst.end()));
+ Inst.insert(std::next(Inst.begin()), MCOperand::createReg(CSKY::C));
+ Inst.insert(Inst.end(), MCOperand::createReg(CSKY::C));
+ break;
+ case CSKY::CMPNEI32:
+ case CSKY::CMPNEI16:
+ case CSKY::CMPNE32:
+ case CSKY::CMPNE16:
+ case CSKY::CMPHSI32:
+ case CSKY::CMPHSI16:
+ case CSKY::CMPHS32:
+ case CSKY::CMPHS16:
+ case CSKY::CMPLTI32:
+ case CSKY::CMPLTI16:
+ case CSKY::CMPLT32:
+ case CSKY::CMPLT16:
+ case CSKY::BTSTI32:
+ Inst.erase(Inst.begin());
+ Inst.insert(Inst.begin(), MCOperand::createReg(CSKY::C));
+ break;
+ case CSKY::MVCV32:
+ Inst.erase(std::next(Inst.begin()));
+ Inst.insert(Inst.end(), MCOperand::createReg(CSKY::C));
+ break;
}
- Out.emitInstruction(Inst, getSTI());
+ emitToStreamer(Out, Inst);
return false;
}
@@ -1422,6 +1465,16 @@ OperandMatchResultTy CSKYAsmParser::tryParseRegister(unsigned &RegNo,
bool CSKYAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
+void CSKYAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
+ MCInst CInst;
+ bool Res = false;
+ if (EnableCompressedInst)
+ Res = compressInst(CInst, Inst, getSTI(), S.getContext());
+ if (Res)
+ ++CSKYNumInstrsCompressed;
+ S.emitInstruction((Res ? CInst : Inst), getSTI());
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYAsmParser() {
RegisterMCAsmParser<CSKYAsmParser> X(getTheCSKYTarget());
}
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
index 1c38c5d1fde6..85129f78e726 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
@@ -30,6 +30,9 @@ using namespace llvm;
#define DEBUG_TYPE "csky-asm-printer"
+STATISTIC(CSKYNumInstrsCompressed,
+ "Number of C-SKY Compressed instructions emitted");
+
CSKYAsmPrinter::CSKYAsmPrinter(llvm::TargetMachine &TM,
std::unique_ptr<llvm::MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this) {}
@@ -39,6 +42,16 @@ bool CSKYAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return AsmPrinter::runOnMachineFunction(MF);
}
+#define GEN_COMPRESS_INSTR
+#include "CSKYGenCompressInstEmitter.inc"
+void CSKYAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
+ MCInst CInst;
+ bool Res = compressInst(CInst, Inst, *Subtarget, OutStreamer->getContext());
+ if (Res)
+ ++CSKYNumInstrsCompressed;
+ AsmPrinter::EmitToStreamer(*OutStreamer, Res ? CInst : Inst);
+}
+
// Simple pseudo-instructions have their lowering (with expansion to real
// instructions) auto-generated.
#include "CSKYGenMCPseudoLowering.inc"
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
index f0f5d8657c04..b30311e0ca64 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
@@ -26,6 +26,8 @@ public:
StringRef getPassName() const override { return "CSKY Assembly Printer"; }
+ void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
+
/// tblgen'erated driver function for lowering simple MI->MC
/// pseudo instructions.
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
diff --git a/llvm/lib/Target/CSKY/CSKYCallingConv.td b/llvm/lib/Target/CSKY/CSKYCallingConv.td
index 87e2e6b9dc31..91102e3714df 100644
--- a/llvm/lib/Target/CSKY/CSKYCallingConv.td
+++ b/llvm/lib/Target/CSKY/CSKYCallingConv.td
@@ -79,4 +79,4 @@ def RetCC_CSKY_ABIV2_FP : CallingConv<[
CCIfType<[i32], CCAssignToReg<[R0, R1]>>,
CCIfType<[f32], CCAssignToReg<[F0_32]>>,
CCIfType<[f64], CCAssignToReg<[F0_64]>>
-]>; \ No newline at end of file
+]>;
diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
index 9b22c95cfe21..3a8ee5713584 100644
--- a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
@@ -54,4 +54,4 @@ void CSKYFrameLowering::emitPrologue(MachineFunction &MF,
void CSKYFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
// FIXME: Implement this when we have function calls
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
index fc9ef8bfd9d9..8dc91904b8cc 100644
--- a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
@@ -40,6 +40,8 @@ public:
}
void Select(SDNode *N) override;
+ bool selectAddCarry(SDNode *N);
+ bool selectSubCarry(SDNode *N);
#include "CSKYGenDAGISel.inc"
};
@@ -60,7 +62,12 @@ void CSKYDAGToDAGISel::Select(SDNode *N) {
switch (Opcode) {
default:
break;
- // FIXME: Add selection nodes needed later.
+ case ISD::ADDCARRY:
+ IsSelected = selectAddCarry(N);
+ break;
+ case ISD::SUBCARRY:
+ IsSelected = selectSubCarry(N);
+ break;
}
if (IsSelected)
@@ -70,6 +77,86 @@ void CSKYDAGToDAGISel::Select(SDNode *N) {
SelectCode(N);
}
+bool CSKYDAGToDAGISel::selectAddCarry(SDNode *N) {
+ MachineSDNode *NewNode = nullptr;
+ auto Type0 = N->getValueType(0);
+ auto Type1 = N->getValueType(1);
+ auto Op0 = N->getOperand(0);
+ auto Op1 = N->getOperand(1);
+ auto Op2 = N->getOperand(2);
+
+ SDLoc Dl(N);
+
+ if (isNullConstant(Op2)) {
+ auto *CA = CurDAG->getMachineNode(
+ Subtarget->has2E3() ? CSKY::CLRC32 : CSKY::CLRC16, Dl, Type1);
+ NewNode = CurDAG->getMachineNode(
+ Subtarget->has2E3() ? CSKY::ADDC32 : CSKY::ADDC16, Dl, {Type0, Type1},
+ {Op0, Op1, SDValue(CA, 0)});
+ } else if (isOneConstant(Op2)) {
+ auto *CA = CurDAG->getMachineNode(
+ Subtarget->has2E3() ? CSKY::SETC32 : CSKY::SETC16, Dl, Type1);
+ NewNode = CurDAG->getMachineNode(
+ Subtarget->has2E3() ? CSKY::ADDC32 : CSKY::ADDC16, Dl, {Type0, Type1},
+ {Op0, Op1, SDValue(CA, 0)});
+ } else {
+ NewNode = CurDAG->getMachineNode(Subtarget->has2E3() ? CSKY::ADDC32
+ : CSKY::ADDC16,
+ Dl, {Type0, Type1}, {Op0, Op1, Op2});
+ }
+ ReplaceNode(N, NewNode);
+ return true;
+}
+
+static SDValue InvertCarryFlag(const CSKYSubtarget *Subtarget,
+ SelectionDAG *DAG, SDLoc Dl, SDValue OldCarry) {
+ auto NewCarryReg =
+ DAG->getMachineNode(Subtarget->has2E3() ? CSKY::MVCV32 : CSKY::MVCV16, Dl,
+ MVT::i32, OldCarry);
+ auto NewCarry =
+ DAG->getMachineNode(Subtarget->hasE2() ? CSKY::BTSTI32 : CSKY::BTSTI16,
+ Dl, OldCarry.getValueType(), SDValue(NewCarryReg, 0),
+ DAG->getTargetConstant(0, Dl, MVT::i32));
+ return SDValue(NewCarry, 0);
+}
+
+bool CSKYDAGToDAGISel::selectSubCarry(SDNode *N) {
+ MachineSDNode *NewNode = nullptr;
+ auto Type0 = N->getValueType(0);
+ auto Type1 = N->getValueType(1);
+ auto Op0 = N->getOperand(0);
+ auto Op1 = N->getOperand(1);
+ auto Op2 = N->getOperand(2);
+
+ SDLoc Dl(N);
+
+ if (isNullConstant(Op2)) {
+ auto *CA = CurDAG->getMachineNode(
+ Subtarget->has2E3() ? CSKY::SETC32 : CSKY::SETC16, Dl, Type1);
+ NewNode = CurDAG->getMachineNode(
+ Subtarget->has2E3() ? CSKY::SUBC32 : CSKY::SUBC16, Dl, {Type0, Type1},
+ {Op0, Op1, SDValue(CA, 0)});
+ } else if (isOneConstant(Op2)) {
+ auto *CA = CurDAG->getMachineNode(
+ Subtarget->has2E3() ? CSKY::CLRC32 : CSKY::CLRC16, Dl, Type1);
+ NewNode = CurDAG->getMachineNode(
+ Subtarget->has2E3() ? CSKY::SUBC32 : CSKY::SUBC16, Dl, {Type0, Type1},
+ {Op0, Op1, SDValue(CA, 0)});
+ } else {
+ auto CarryIn = InvertCarryFlag(Subtarget, CurDAG, Dl, Op2);
+ NewNode = CurDAG->getMachineNode(Subtarget->has2E3() ? CSKY::SUBC32
+ : CSKY::SUBC16,
+ Dl, {Type0, Type1}, {Op0, Op1, CarryIn});
+ }
+ auto CarryOut = InvertCarryFlag(Subtarget, CurDAG, Dl, SDValue(NewNode, 1));
+
+ ReplaceUses(SDValue(N, 0), SDValue(NewNode, 0));
+ ReplaceUses(SDValue(N, 1), CarryOut);
+ CurDAG->RemoveDeadNode(N);
+
+ return true;
+}
+
FunctionPass *llvm::createCSKYISelDag(CSKYTargetMachine &TM) {
return new CSKYDAGToDAGISel(TM);
}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index ac6d069e592c..a1f7cc685d4c 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -37,6 +37,46 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
// Register Class
addRegisterClass(MVT::i32, &CSKY::GPRRegClass);
+ setOperationAction(ISD::ADDCARRY, MVT::i32, Legal);
+ setOperationAction(ISD::SUBCARRY, MVT::i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i32, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i32, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, MVT::i1, Promote);
+
+ if (!Subtarget.hasE2()) {
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i32, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i32, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ }
+
+ if (!Subtarget.has2E3()) {
+ setOperationAction(ISD::ABS, MVT::i32, Expand);
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ }
+
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td
index 6d42bddcdd78..ea0761d97545 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td
@@ -88,6 +88,19 @@ class R16_XZ_UNOP<bits<4> op, bits<2> sop, string opstr> : CSKY16Inst<
let Inst{1, 0} = sop;
}
+class R16_Z_UNOP<bits<4> op, bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rx), !strconcat(opstr, "\t$rz"),
+ []> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = op;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+ let Constraints = "$rz = $rx";
+}
+
class R16_XY_CMP<bits<2> sop, string opstr> : CSKY16Inst<
AddrModeNone, (outs CARRY:$ca), (ins sGPR:$rx, sGPR:$ry), !strconcat(opstr, "\t$rx, $ry"),
[]> {
@@ -146,7 +159,7 @@ class I16_X_CMP<bits<3> sop, string opstr, Operand Immoperand> : CSKY16Inst<
}
class I16_SP_IMM7<bits<3> sop, string opstr> : CSKY16Inst<
- AddrModeNone, (outs SPOp:$sp2), (ins SPOp:$sp1, uimm7_2:$imm7),
+ AddrModeNone, (outs GPRSP:$sp2), (ins GPRSP:$sp1, uimm7_2:$imm7),
!strconcat(opstr, "\t$sp2, $sp1, $imm7"), []> {
bits<7> imm7;
let Inst{15, 14} = 0b00;
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
index e12235cf9478..6fcb136cd99b 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
@@ -11,6 +11,8 @@
//===----------------------------------------------------------------------===//
#include "CSKYInstrInfo.h"
+#include "CSKYMachineFunctionInfo.h"
+#include "CSKYTargetMachine.h"
#include "llvm/MC/MCContext.h"
#define DEBUG_TYPE "csky-instr-info"
@@ -23,3 +25,289 @@ using namespace llvm;
CSKYInstrInfo::CSKYInstrInfo(CSKYSubtarget &STI)
: CSKYGenInstrInfo(CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), STI(STI) {
}
+
+Register CSKYInstrInfo::movImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, int64_t Val,
+ MachineInstr::MIFlag Flag) const {
+ assert(isUInt<32>(Val) && "should be uint32");
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ Register DstReg;
+ if (STI.hasE2()) {
+ DstReg = MRI.createVirtualRegister(&CSKY::GPRRegClass);
+
+ if (isUInt<16>(Val)) {
+ BuildMI(MBB, MBBI, DL, get(CSKY::MOVI32), DstReg)
+ .addImm(Val & 0xFFFF)
+ .setMIFlags(Flag);
+ } else if (isShiftedUInt<16, 16>(Val)) {
+ BuildMI(MBB, MBBI, DL, get(CSKY::MOVIH32), DstReg)
+ .addImm((Val >> 16) & 0xFFFF)
+ .setMIFlags(Flag);
+ } else {
+ BuildMI(MBB, MBBI, DL, get(CSKY::MOVIH32), DstReg)
+ .addImm((Val >> 16) & 0xFFFF)
+ .setMIFlags(Flag);
+ BuildMI(MBB, MBBI, DL, get(CSKY::ORI32), DstReg)
+ .addReg(DstReg)
+ .addImm(Val & 0xFFFF)
+ .setMIFlags(Flag);
+ }
+
+ } else {
+ DstReg = MRI.createVirtualRegister(&CSKY::mGPRRegClass);
+ if (isUInt<8>(Val)) {
+ BuildMI(MBB, MBBI, DL, get(CSKY::MOVI16), DstReg)
+ .addImm(Val & 0xFF)
+ .setMIFlags(Flag);
+ } else if (isUInt<16>(Val)) {
+ BuildMI(MBB, MBBI, DL, get(CSKY::MOVI16), DstReg)
+ .addImm((Val >> 8) & 0xFF)
+ .setMIFlags(Flag);
+ BuildMI(MBB, MBBI, DL, get(CSKY::LSLI16), DstReg)
+ .addReg(DstReg)
+ .addImm(8)
+ .setMIFlags(Flag);
+ if ((Val & 0xFF) != 0)
+ BuildMI(MBB, MBBI, DL, get(CSKY::ADDI16), DstReg)
+ .addReg(DstReg)
+ .addImm(Val & 0xFF)
+ .setMIFlags(Flag);
+ } else if (isUInt<24>(Val)) {
+ BuildMI(MBB, MBBI, DL, get(CSKY::MOVI16), DstReg)
+ .addImm((Val >> 16) & 0xFF)
+ .setMIFlags(Flag);
+ BuildMI(MBB, MBBI, DL, get(CSKY::LSLI16), DstReg)
+ .addReg(DstReg)
+ .addImm(8)
+ .setMIFlags(Flag);
+ if (((Val >> 8) & 0xFF) != 0)
+ BuildMI(MBB, MBBI, DL, get(CSKY::ADDI16), DstReg)
+ .addReg(DstReg)
+ .addImm((Val >> 8) & 0xFF)
+ .setMIFlags(Flag);
+ BuildMI(MBB, MBBI, DL, get(CSKY::LSLI16), DstReg)
+ .addReg(DstReg)
+ .addImm(8)
+ .setMIFlags(Flag);
+ if ((Val & 0xFF) != 0)
+ BuildMI(MBB, MBBI, DL, get(CSKY::ADDI16), DstReg)
+ .addReg(DstReg)
+ .addImm(Val & 0xFF)
+ .setMIFlags(Flag);
+ } else {
+ BuildMI(MBB, MBBI, DL, get(CSKY::MOVI16), DstReg)
+ .addImm((Val >> 24) & 0xFF)
+ .setMIFlags(Flag);
+ BuildMI(MBB, MBBI, DL, get(CSKY::LSLI16), DstReg)
+ .addReg(DstReg)
+ .addImm(8)
+ .setMIFlags(Flag);
+ if (((Val >> 16) & 0xFF) != 0)
+ BuildMI(MBB, MBBI, DL, get(CSKY::ADDI16), DstReg)
+ .addReg(DstReg)
+ .addImm((Val >> 16) & 0xFF)
+ .setMIFlags(Flag);
+ BuildMI(MBB, MBBI, DL, get(CSKY::LSLI16), DstReg)
+ .addReg(DstReg)
+ .addImm(8)
+ .setMIFlags(Flag);
+ if (((Val >> 8) & 0xFF) != 0)
+ BuildMI(MBB, MBBI, DL, get(CSKY::ADDI16), DstReg)
+ .addReg(DstReg)
+ .addImm((Val >> 8) & 0xFF)
+ .setMIFlags(Flag);
+ BuildMI(MBB, MBBI, DL, get(CSKY::LSLI16), DstReg)
+ .addReg(DstReg)
+ .addImm(8)
+ .setMIFlags(Flag);
+ if ((Val & 0xFF) != 0)
+ BuildMI(MBB, MBBI, DL, get(CSKY::ADDI16), DstReg)
+ .addReg(DstReg)
+ .addImm(Val & 0xFF)
+ .setMIFlags(Flag);
+ }
+ }
+
+ return DstReg;
+}
+
+unsigned CSKYInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const {
+ switch (MI.getOpcode()) {
+ default:
+ return 0;
+ case CSKY::LD16B:
+ case CSKY::LD16H:
+ case CSKY::LD16W:
+ case CSKY::LD32B:
+ case CSKY::LD32BS:
+ case CSKY::LD32H:
+ case CSKY::LD32HS:
+ case CSKY::LD32W:
+ case CSKY::RESTORE_CARRY:
+ break;
+ }
+
+ if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
+ MI.getOperand(2).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
+ }
+
+ return 0;
+}
+
+unsigned CSKYInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const {
+ switch (MI.getOpcode()) {
+ default:
+ return 0;
+ case CSKY::ST16B:
+ case CSKY::ST16H:
+ case CSKY::ST16W:
+ case CSKY::ST32B:
+ case CSKY::ST32H:
+ case CSKY::ST32W:
+ case CSKY::SPILL_CARRY:
+ break;
+ }
+
+ if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
+ MI.getOperand(2).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
+ }
+
+ return 0;
+}
+
+void CSKYInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ Register SrcReg, bool IsKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end())
+ DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ unsigned Opcode = 0;
+
+ if (CSKY::GPRRegClass.hasSubClassEq(RC)) {
+ Opcode = CSKY::ST32W; // Optimize for 16bit
+ } else if (CSKY::CARRYRegClass.hasSubClassEq(RC)) {
+ Opcode = CSKY::SPILL_CARRY;
+ CFI->setSpillsCR();
+ } else {
+ llvm_unreachable("Unknown RegisterClass");
+ }
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
+
+ BuildMI(MBB, I, DL, get(Opcode))
+ .addReg(SrcReg, getKillRegState(IsKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
+void CSKYInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ Register DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end())
+ DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ unsigned Opcode = 0;
+
+ if (CSKY::GPRRegClass.hasSubClassEq(RC)) {
+ Opcode = CSKY::LD32W;
+ } else if (CSKY::CARRYRegClass.hasSubClassEq(RC)) {
+ Opcode = CSKY::RESTORE_CARRY;
+ CFI->setSpillsCR();
+ } else {
+ llvm_unreachable("Unknown RegisterClass");
+ }
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
+
+ BuildMI(MBB, I, DL, get(Opcode), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
+void CSKYInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ if (CSKY::GPRRegClass.contains(SrcReg) &&
+ CSKY::CARRYRegClass.contains(DestReg)) {
+ if (STI.hasE2()) {
+ BuildMI(MBB, I, DL, get(CSKY::BTSTI32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ } else {
+ assert(SrcReg < CSKY::R8);
+ BuildMI(MBB, I, DL, get(CSKY::BTSTI16), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ }
+ return;
+ }
+
+ if (CSKY::CARRYRegClass.contains(SrcReg) &&
+ CSKY::GPRRegClass.contains(DestReg)) {
+
+ if (STI.hasE2()) {
+ BuildMI(MBB, I, DL, get(CSKY::MVC32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ assert(DestReg < CSKY::R16);
+ assert(DestReg < CSKY::R8);
+ BuildMI(MBB, I, DL, get(CSKY::MOVI16), DestReg).addImm(0);
+ BuildMI(MBB, I, DL, get(CSKY::ADDC16))
+ .addReg(DestReg, RegState::Define)
+ .addReg(SrcReg, RegState::Define)
+ .addReg(DestReg, getKillRegState(true))
+ .addReg(DestReg, getKillRegState(true))
+ .addReg(SrcReg, getKillRegState(true));
+ BuildMI(MBB, I, DL, get(CSKY::BTSTI16))
+ .addReg(SrcReg, RegState::Define | getDeadRegState(KillSrc))
+ .addReg(DestReg)
+ .addImm(0);
+ }
+ return;
+ }
+
+ unsigned Opcode = 0;
+ if (CSKY::GPRRegClass.contains(DestReg, SrcReg))
+ Opcode = CSKY::MOV32;
+ else {
+ LLVM_DEBUG(dbgs() << "src = " << SrcReg << ", dst = " << DestReg);
+ LLVM_DEBUG(I->dump());
+ llvm_unreachable("Unknown RegisterClass");
+ }
+
+ BuildMI(MBB, I, DL, get(Opcode), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
index 04be9da27b57..450641d96b74 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
@@ -29,6 +29,31 @@ protected:
public:
explicit CSKYInstrInfo(CSKYSubtarget &STI);
+
+ unsigned isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const override;
+ unsigned isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, Register SrcReg,
+ bool IsKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, Register DestReg,
+ int FrameIndex, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
+ bool KillSrc) const override;
+
+ // Materializes the given integer Val into DstReg.
+ Register movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, int64_t Val,
+ MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
index 9dda3159e446..30d9206eec68 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -52,6 +52,11 @@ class OImmAsmOperand<int width, string suffix = "">
: ImmAsmOperand<"O", width, suffix> {
}
+def to_tframeindex : SDNodeXForm<frameindex, [{
+ auto FI = cast<FrameIndexSDNode>(N);
+ return CurDAG->getTargetFrameIndex(FI->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout()));
+}]>;
+
class oimm<int num> : Operand<i32>,
ImmLeaf<i32, "return isUInt<"#num#">(Imm - 1);"> {
let EncoderMethod = "getOImmOpValue";
@@ -166,9 +171,23 @@ def bare_symbol : Operand<iPTR> {
let OperandType = "OPERAND_PCREL";
}
-def oimm3 : oimm<3>;
+def oimm3 : oimm<3> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isUInt<3>(Imm - 1);
+ return MCOp.isBareSymbolRef();
+ }];
+}
def oimm4 : oimm<4>;
-def oimm5 : oimm<5>;
+def oimm5 : oimm<5> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isUInt<5>(Imm - 1);
+ return MCOp.isBareSymbolRef();
+ }];
+}
def oimm6 : oimm<6>;
def imm5_idly : Operand<i32>, ImmLeaf<i32,
@@ -177,9 +196,30 @@ def imm5_idly : Operand<i32>, ImmLeaf<i32,
let DecoderMethod = "decodeOImmOperand<5>";
}
-def oimm8 : oimm<8>;
-def oimm12 : oimm<12>;
-def oimm16 : oimm<16>;
+def oimm8 : oimm<8> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isUInt<8>(Imm - 1);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+def oimm12 : oimm<12> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isUInt<12>(Imm - 1);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+def oimm16 : oimm<16> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isUInt<16>(Imm - 1);
+ return MCOp.isBareSymbolRef();
+ }];
+}
def nimm12 : nimm<12>;
@@ -195,28 +235,98 @@ def uimm2_jmpix : Operand<i32>,
def uimm3 : uimm<3>;
def uimm4 : uimm<4>;
-def uimm5 : uimm<5>;
+def uimm5 : uimm<5> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedUInt<5, 0>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
def uimm5_msb_size : uimm<5> {
let EncoderMethod = "getImmOpValueMSBSize";
}
-def uimm5_1 : uimm<5, 1>;
-def uimm5_2 : uimm<5, 2>;
+def uimm5_1 : uimm<5, 1> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedUInt<5, 1>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+def uimm5_2 : uimm<5, 2> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedUInt<5, 2>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
def uimm6 : uimm<6>;
def uimm7 : uimm<7>;
def uimm7_1 : uimm<7, 1>;
-def uimm7_2 : uimm<7, 2>;
+def uimm7_2 : uimm<7, 2>{
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedUInt<7, 2>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
def uimm7_3 : uimm<7, 3>;
-def uimm8 : uimm<8>;
-def uimm8_2 : uimm<8, 2>;
+def uimm8 : uimm<8> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedUInt<8, 0>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+def uimm8_2 : uimm<8, 2> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedUInt<8, 2>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
def uimm8_3 : uimm<8, 3>;
def uimm8_8 : uimm<8, 8>;
def uimm8_16 : uimm<8, 16>;
def uimm8_24 : uimm<8, 24>;
-def uimm12 : uimm<12>;
-def uimm12_1 : uimm<12, 1>;
-def uimm12_2 : uimm<12, 2>;
-def uimm16 : uimm<16>;
+def uimm12 : uimm<12> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedUInt<12, 0>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+def uimm12_1 : uimm<12, 1> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedUInt<12, 1>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+def uimm12_2 : uimm<12, 2> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedUInt<12, 2>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+def uimm16 : uimm<16> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedUInt<16, 0>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
def uimm16_8 : uimm<16, 8>;
def uimm16_16 : uimm<16, 16>;
def uimm20 : uimm<20>;
@@ -642,11 +752,6 @@ def BSR32_BR : J<0x38, (outs), (ins call_symbol:$offset), "bsr32", []>{
let Defs = [ R15 ];
}
-let Predicates = [iHasE2], isCodeGenOnly = 1 in {
- def RTS32 : I_16_RET<0x6, 0xF, "rts32", [(CSKY_RET)]>;
-}
-
-
//===----------------------------------------------------------------------===//
// Symbol address instructions.
//===----------------------------------------------------------------------===//
@@ -872,6 +977,102 @@ def TRAP32 : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins uimm2:$imm2), "trap32 $
}
+//===----------------------------------------------------------------------===//
+// Instruction Patterns.
+//===----------------------------------------------------------------------===//
+
+// Load & Store Patterns
+multiclass LdPat<PatFrag LoadOp, ImmLeaf imm_type, Instruction Inst, ValueType Type> {
+ def : Pat<(Type (LoadOp GPR:$rs1)), (Inst GPR:$rs1, 0)>;
+ def : Pat<(Type (LoadOp (i32 frameindex:$rs1))), (Inst (i32 (to_tframeindex tframeindex:$rs1)), 0)>;
+ def : Pat<(Type (LoadOp (add GPR:$rs1, imm_type:$uimm))),
+ (Inst GPR:$rs1, imm_type:$uimm)>;
+ def : Pat<(Type (LoadOp (add frameindex:$rs1, imm_type:$uimm))),
+ (Inst (i32 (to_tframeindex tframeindex:$rs1)), imm_type:$uimm)>;
+ def : Pat<(Type (LoadOp (eqToAdd frameindex:$rs1, imm_type:$uimm))),
+ (Inst (i32 (to_tframeindex tframeindex:$rs1)), imm_type:$uimm)>;
+ def : Pat<(Type (LoadOp (add GPR:$rs1, tglobaladdr:$gd))),
+ (Inst GPR:$rs1, tglobaladdr:$gd)>;
+}
+
+defm : LdPat<extloadi8, uimm12, LD32B, i32>;
+defm : LdPat<zextloadi8, uimm12, LD32B, i32>;
+let Predicates = [iHasE2] in {
+ defm : LdPat<sextloadi8, uimm12, LD32BS, i32>;
+}
+defm : LdPat<extloadi16, uimm12_1, LD32H, i32>;
+defm : LdPat<zextloadi16, uimm12_1, LD32H, i32>;
+let Predicates = [iHasE2] in {
+defm : LdPat<sextloadi16, uimm12_1, LD32HS, i32>;
+}
+defm : LdPat<load, uimm12_2, LD32W, i32>;
+
+multiclass LdrPat<PatFrag LoadOp, Instruction Inst, ValueType Type> {
+ def : Pat<(Type (LoadOp (add GPR:$rs1, GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2, 0)>;
+ def : Pat<(Type (LoadOp (add GPR:$rs1, (shl GPR:$rs2, (i32 1))))), (Inst GPR:$rs1, GPR:$rs2, 1)>;
+ def : Pat<(Type (LoadOp (add GPR:$rs1, (shl GPR:$rs2, (i32 2))))), (Inst GPR:$rs1, GPR:$rs2, 2)>;
+ def : Pat<(Type (LoadOp (add GPR:$rs1, (shl GPR:$rs2, (i32 3))))), (Inst GPR:$rs1, GPR:$rs2, 3)>;
+}
+
+let Predicates = [iHas2E3] in {
+ defm : LdrPat<zextloadi8, LDR32B, i32>;
+ defm : LdrPat<sextloadi8, LDR32BS, i32>;
+ defm : LdrPat<extloadi8, LDR32BS, i32>;
+ defm : LdrPat<zextloadi16, LDR32H, i32>;
+ defm : LdrPat<sextloadi16, LDR32HS, i32>;
+ defm : LdrPat<extloadi16, LDR32HS, i32>;
+ defm : LdrPat<load, LDR32W, i32>;
+}
+
+multiclass StPat<PatFrag StoreOp, ValueType Type, ImmLeaf imm_type, Instruction Inst> {
+ def : Pat<(StoreOp Type:$rs2, GPR:$rs1), (Inst Type:$rs2, GPR:$rs1, 0)>;
+ def : Pat<(StoreOp Type:$rs2, frameindex:$rs1), (Inst Type:$rs2, (i32 (to_tframeindex tframeindex:$rs1)), 0)>;
+ def : Pat<(StoreOp Type:$rs2, (add GPR:$rs1, imm_type:$uimm12)),
+ (Inst Type:$rs2, GPR:$rs1, imm_type:$uimm12)>;
+ def : Pat<(StoreOp Type:$rs2, (add frameindex:$rs1, imm_type:$uimm12)),
+ (Inst Type:$rs2, (i32 (to_tframeindex tframeindex:$rs1)), imm_type:$uimm12)>;
+ def : Pat<(StoreOp Type:$rs2, (eqToAdd frameindex:$rs1, imm_type:$uimm12)),
+ (Inst Type:$rs2, (i32 (to_tframeindex tframeindex:$rs1)), imm_type:$uimm12)>;
+}
+
+defm : StPat<truncstorei8, i32, uimm12, ST32B>;
+defm : StPat<truncstorei16, i32, uimm12_1, ST32H>;
+defm : StPat<store, i32, uimm12_2, ST32W>;
+
+multiclass StrPat<PatFrag StoreOp, ValueType Type, Instruction Inst> {
+ def : Pat<(StoreOp Type:$rz, (add GPR:$rs1, GPR:$rs2)), (Inst Type:$rz, GPR:$rs1, GPR:$rs2, 0)>;
+ def : Pat<(StoreOp Type:$rz, (add GPR:$rs1, (shl GPR:$rs2, (i32 1)))), (Inst Type:$rz, GPR:$rs1, GPR:$rs2, 1)>;
+ def : Pat<(StoreOp Type:$rz, (add GPR:$rs1, (shl GPR:$rs2, (i32 2)))), (Inst Type:$rz, GPR:$rs1, GPR:$rs2, 2)>;
+ def : Pat<(StoreOp Type:$rz, (add GPR:$rs1, (shl GPR:$rs2, (i32 3)))), (Inst Type:$rz, GPR:$rs1, GPR:$rs2, 3)>;
+}
+
+let Predicates = [iHas2E3] in {
+ defm : StrPat<truncstorei8, i32, STR32B>;
+ defm : StrPat<truncstorei16, i32, STR32H>;
+ defm : StrPat<store, i32, STR32W>;
+
+ // Sext & Zext Patterns
+ def : Pat<(sext_inreg GPR:$src, i1), (SEXT32 GPR:$src, 0, 0)>;
+ def : Pat<(and GPR:$src, 255), (ZEXT32 GPR:$src, 7, 0)>;
+ def : Pat<(and GPR:$src, 65535), (ZEXT32 GPR:$src, 15, 0)>;
+}
+
+// Constant materialize patterns.
+let Predicates = [iHasE2] in
+ def : Pat<(i32 imm:$imm),
+ (ORI32 (MOVIH32 (uimm32_hi16 imm:$imm)), (uimm32_lo16 imm:$imm))>;
+
+
+// Other operations.
+let Predicates = [iHasE2] in {
+ def : Pat<(rotl GPR:$rs1, GPR:$rs2),
+ (ROTL32 GPR:$rs1, (ANDI32 GPR:$rs2, 0x1f))>;
+ let Predicates = [iHas2E3] in {
+ def : Pat<(bitreverse GPR:$rx), (BREV32 GPR:$rx)>;
+ def : Pat<(bswap GPR:$rx), (REVB32 GPR:$rx)>;
+ }
+ def : Pat<(i32 (ctlz GPR:$rx)), (FF1 GPR:$rx)>;
+}
//===----------------------------------------------------------------------===//
// Pseudo for assembly
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
index c98f43622155..6a9dd03dfa1d 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
@@ -33,16 +33,6 @@ def br_symbol_16bit : Operand<iPTR> {
let OperandType = "OPERAND_PCREL";
}
-def SPOperand : AsmOperandClass {
- let Name = "SPOperand";
- let RenderMethod = "addRegOperands";
- let DiagnosticType = !strconcat("Invalid", Name);
-}
-
-def SPOp : RegisterOperand<GPR> {
- let ParserMatchClass = SPOperand;
-}
-
def constpool_symbol_16bit : Operand<iPTR> {
let ParserMatchClass = Constpool;
let EncoderMethod =
@@ -83,7 +73,7 @@ let isCommutable = 1 in {
def XOR16 : R16_XZ_BINOP<0b1011, 0b01, "xor16", BinOpFrag<(xor node:$LHS, node:$RHS)>>;
def NOR16 : R16_XZ_BINOP<0b1011, 0b10, "nor16", BinOpFrag<(not (or node:$LHS, node:$RHS))>>;
let isCodeGenOnly = 1 in
- def NOT16 : R16_XZ_UNOP<0b1011, 0b10, "not16">;
+ def NOT16 : R16_Z_UNOP<0b1011, 0b10, "not16">;
def MULT16 : R16_XZ_BINOP<0b1111, 0b00, "mult16", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
}
def SUBU16XZ : R16_XZ_BINOP<0b1000, 0b10, "subu16", BinOpFrag<(sub node:$LHS, node:$RHS)>>;
@@ -108,7 +98,7 @@ let Constraints = "$rZ = $rz", isReMaterializable = 1, isAsCheapAsAMove = 1 in {
}
let isAdd = 1 in
-def ADDI16ZSP : I16_Z_8<0b011, (ins SPOp:$sp, uimm8_2:$imm8),
+def ADDI16ZSP : I16_Z_8<0b011, (ins GPRSP:$sp, uimm8_2:$imm8),
"addi16\t$rz, $sp, $imm8">;
let isAdd = 1 in
@@ -150,9 +140,9 @@ def ST16W : I16_XZ_LDST<AddrMode16W, 0b110, "st16.w",
(outs), (ins mGPR:$rz, mGPR:$rx, uimm5_2:$imm)>;
def LD16WSP : I16_ZSP_LDST<AddrMode16W, 0b011, "ld16.w",
- (outs mGPR:$rz), (ins SPOp:$sp, uimm8_2:$addr)>;
+ (outs mGPR:$rz), (ins GPRSP:$sp, uimm8_2:$addr)>;
def ST16WSP : I16_ZSP_LDST<AddrMode16W, 0b111, "st16.w",
- (outs), (ins mGPR:$rz, SPOp:$sp, uimm8_2:$addr)>;
+ (outs), (ins mGPR:$rz, GPRSP:$sp, uimm8_2:$addr)>;
//===----------------------------------------------------------------------===//
// Compare instructions.
@@ -450,3 +440,150 @@ def JBF16 : JBranchPseudo<(outs),
let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in
def PseudoLRW16 : CSKYPseudo<(outs mGPR:$rz),
(ins bare_symbol:$src), "lrw16 $rz, $src", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Compress Instruction tablegen backend.
+//===----------------------------------------------------------------------===//
+
+def : CompressPat<(ADDU32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (ADDU16XZ sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(ADDU32 sGPR:$rd, sGPR:$rs1, sGPR:$rd),
+ (ADDU16XZ sGPR:$rd, sGPR:$rs1)>;
+def : CompressPat<(ADDU32 mGPR:$rd, mGPR:$rs1, mGPR:$rs2),
+ (ADDU16 mGPR:$rd, mGPR:$rs1, mGPR:$rs2)>;
+def : CompressPat<(SUBU32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (SUBU16XZ sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(SUBU32 mGPR:$rd, mGPR:$rs1, mGPR:$rs2),
+ (SUBU16 mGPR:$rd, mGPR:$rs1, mGPR:$rs2)>;
+
+def : CompressPat<
+ (ADDC32 sGPR:$rd, CARRY:$cout, sGPR:$rd, sGPR:$rs2, CARRY:$cout),
+ (ADDC16 sGPR:$rd, CARRY:$cout, sGPR:$rs2, CARRY:$cout)
+ >;
+def : CompressPat<
+ (SUBC32 sGPR:$rd, CARRY:$cout, sGPR:$rd, sGPR:$rs2, CARRY:$cout),
+ (SUBC16 sGPR:$rd, CARRY:$cout, sGPR:$rs2, CARRY:$cout)
+ >;
+
+def : CompressPat<(ADDI32 mGPR:$rd, mGPR:$rs, oimm3:$imm),
+ (ADDI16XZ mGPR:$rd, mGPR:$rs, oimm3:$imm)>;
+def : CompressPat<(SUBI32 mGPR:$rd, mGPR:$rs, oimm3:$imm),
+ (SUBI16XZ mGPR:$rd, mGPR:$rs, oimm3:$imm)>;
+
+def : CompressPat<(ADDI32 mGPR:$rd, mGPR:$rd, oimm8:$imm),
+ (ADDI16 mGPR:$rd, oimm8:$imm)>;
+def : CompressPat<(SUBI32 mGPR:$rd, mGPR:$rd, oimm8:$imm),
+ (SUBI16 mGPR:$rd, oimm8:$imm)>;
+
+def : CompressPat<(ADDI32 GPRSP:$sp, GPRSP:$sp, uimm7_2:$imm),
+ (ADDI16SPSP GPRSP:$sp, GPRSP:$sp, uimm7_2:$imm)>;
+def : CompressPat<(SUBI32 GPRSP:$sp, GPRSP:$sp, uimm7_2:$imm),
+ (SUBI16SPSP GPRSP:$sp, GPRSP:$sp, uimm7_2:$imm)>;
+
+def : CompressPat<(ADDI32 mGPR:$rd, GPRSP:$sp, uimm8_2:$imm),
+ (ADDI16ZSP mGPR:$rd, GPRSP:$sp, uimm8_2:$imm)>;
+
+def : CompressPat<(MULT32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (MULT16 sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(MULT32 sGPR:$rd, sGPR:$rs1, sGPR:$rd),
+ (MULT16 sGPR:$rd, sGPR:$rs1)>;
+def : CompressPat<(AND32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (AND16 sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(AND32 sGPR:$rd, sGPR:$rs1, sGPR:$rd),
+ (AND16 sGPR:$rd, sGPR:$rs1)>;
+def : CompressPat<(OR32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (OR16 sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(OR32 sGPR:$rd, sGPR:$rs1, sGPR:$rd),
+ (OR16 sGPR:$rd, sGPR:$rs1)>;
+def : CompressPat<(XOR32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (XOR16 sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(XOR32 sGPR:$rd, sGPR:$rs1, sGPR:$rd),
+ (XOR16 sGPR:$rd, sGPR:$rs1)>;
+
+def : CompressPat<(ANDN32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (ANDN16 sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(NOR32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (NOR16 sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(LSL32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (LSL16 sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(LSR32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (LSR16 sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(ASR32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (ASR16 sGPR:$rd, sGPR:$rs2)>;
+def : CompressPat<(ROTL32 sGPR:$rd, sGPR:$rd, sGPR:$rs2),
+ (ROTL16 sGPR:$rd, sGPR:$rs2)>;
+
+def : CompressPat<(NOT32 sGPR:$rd, sGPR:$rd),
+ (NOT16 sGPR:$rd)>;
+
+let Predicates = [iHas2E3] in
+def : CompressPat<(REVB32 sGPR:$rd, sGPR:$rs),
+ (REVB16 sGPR:$rd, sGPR:$rs)>;
+
+def : CompressPat<(LSLI32 mGPR:$rd, mGPR:$rs, uimm5:$imm),
+ (LSLI16 mGPR:$rd, mGPR:$rs, uimm5:$imm)>;
+def : CompressPat<(LSRI32 mGPR:$rd, mGPR:$rs, uimm5:$imm),
+ (LSRI16 mGPR:$rd, mGPR:$rs, uimm5:$imm)>;
+def : CompressPat<(ASRI32 mGPR:$rd, mGPR:$rs, uimm5:$imm),
+ (ASRI16 mGPR:$rd, mGPR:$rs, uimm5:$imm)>;
+
+def : CompressPat<(CMPHS32 CARRY:$ca, sGPR:$rs1, sGPR:$rs2),
+ (CMPHS16 CARRY:$ca, sGPR:$rs1, sGPR:$rs2)>;
+def : CompressPat<(CMPLT32 CARRY:$ca, sGPR:$rs1, sGPR:$rs2),
+ (CMPLT16 CARRY:$ca, sGPR:$rs1, sGPR:$rs2)>;
+def : CompressPat<(CMPNE32 CARRY:$ca, sGPR:$rs1, sGPR:$rs2),
+ (CMPNE16 CARRY:$ca, sGPR:$rs1, sGPR:$rs2)>;
+
+def : CompressPat<(CMPHSI32 CARRY:$ca, mGPR:$rs, oimm5:$imm),
+ (CMPHSI16 CARRY:$ca, mGPR:$rs, oimm5:$imm)>;
+def : CompressPat<(CMPLTI32 CARRY:$ca, mGPR:$rs, oimm5:$imm),
+ (CMPLTI16 CARRY:$ca, mGPR:$rs, oimm5:$imm)>;
+def : CompressPat<(CMPNEI32 CARRY:$ca, mGPR:$rs, uimm5:$imm),
+ (CMPNEI16 CARRY:$ca, mGPR:$rs, uimm5:$imm)>;
+
+def : CompressPat<(JSR32 sGPR:$rd),
+ (JSR16 sGPR:$rd)>;
+
+
+def : CompressPat<(MVCV32 sGPR:$rd, CARRY:$ca),
+ (MVCV16 sGPR:$rd, CARRY:$ca)>;
+def : CompressPat<(MOV32 sGPR:$rd, sGPR:$ca),
+ (MOV16 sGPR:$rd, sGPR:$ca)>;
+def : CompressPat<(MOVI32 mGPR:$rd, uimm8:$imm),
+ (MOVI16 mGPR:$rd, uimm8:$imm)>;
+
+def : CompressPat<(LD32B mGPR:$rd, mGPR:$rs, uimm5:$imm),
+ (LD16B mGPR:$rd, mGPR:$rs, uimm5:$imm)>;
+def : CompressPat<(LD32H mGPR:$rd, mGPR:$rs, uimm5_1:$imm),
+ (LD16H mGPR:$rd, mGPR:$rs, uimm5_1:$imm)>;
+def : CompressPat<(LD32W mGPR:$rd, mGPR:$rs, uimm5_2:$imm),
+ (LD16W mGPR:$rd, mGPR:$rs, uimm5_2:$imm)>;
+def : CompressPat<(LD32W mGPR:$rd, GPRSP:$sp, uimm8_2:$imm),
+ (LD16WSP mGPR:$rd, GPRSP:$sp, uimm8_2:$imm)>;
+
+def : CompressPat<(ST32B mGPR:$rd, mGPR:$rs, uimm5:$imm),
+ (ST16B mGPR:$rd, mGPR:$rs, uimm5:$imm)>;
+def : CompressPat<(ST32H mGPR:$rd, mGPR:$rs, uimm5_1:$imm),
+ (ST16H mGPR:$rd, mGPR:$rs, uimm5_1:$imm)>;
+def : CompressPat<(ST32W mGPR:$rd, mGPR:$rs, uimm5_2:$imm),
+ (ST16W mGPR:$rd, mGPR:$rs, uimm5_2:$imm)>;
+def : CompressPat<(ST32W mGPR:$rd, GPRSP:$sp, uimm8_2:$imm),
+ (ST16WSP mGPR:$rd, GPRSP:$sp, uimm8_2:$imm)>;
+
+let Predicates = [HasBTST16] in
+def : CompressPat<(BTSTI32 CARRY:$ca, mGPR:$rs, uimm5:$imm),
+ (BTSTI16 CARRY:$ca, mGPR:$rs, uimm5:$imm)>;
+def : CompressPat<(BCLRI32 mGPR:$rd, mGPR:$rd, uimm5:$imm),
+ (BCLRI16 mGPR:$rd, uimm5:$imm)>;
+def : CompressPat<(BSETI32 mGPR:$rd, mGPR:$rd, uimm5:$imm),
+ (BSETI16 mGPR:$rd, uimm5:$imm)>;
+
+def : CompressPat<(ZEXTB32 sGPR:$rd, sGPR:$rs),
+ (ZEXTB16 sGPR:$rd, sGPR:$rs)>;
+def : CompressPat<(ZEXTH32 sGPR:$rd, sGPR:$rs),
+ (ZEXTH16 sGPR:$rd, sGPR:$rs)>;
+def : CompressPat<(SEXTB32 sGPR:$rd, sGPR:$rs),
+ (SEXTB16 sGPR:$rd, sGPR:$rs)>;
+def : CompressPat<(SEXTH32 sGPR:$rd, sGPR:$rs),
+ (SEXTH16 sGPR:$rd, sGPR:$rs)>;
diff --git a/llvm/lib/Target/CSKY/CSKYMCInstLower.cpp b/llvm/lib/Target/CSKY/CSKYMCInstLower.cpp
index c42a56bfb04e..7e0b9bcd7549 100644
--- a/llvm/lib/Target/CSKY/CSKYMCInstLower.cpp
+++ b/llvm/lib/Target/CSKY/CSKYMCInstLower.cpp
@@ -114,4 +114,4 @@ bool CSKYMCInstLower::lowerOperand(const MachineOperand &MO,
break;
}
return true;
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
index a1d45fea534b..57b6ae3c27b5 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
@@ -88,8 +88,187 @@ CSKYRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_I32_SaveList;
}
+static bool IsLegalOffset(const CSKYInstrInfo *TII, MachineInstr *MI,
+ int &Offset) {
+ const MCInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & CSKYII::AddrModeMask);
+ unsigned i = 0;
+ for (; !MI->getOperand(i).isFI(); ++i) {
+ assert(i + 1 < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ if (MI->getOpcode() == CSKY::ADDI32) {
+ if (!isUInt<12>(std::abs(Offset) - 1))
+ return false;
+ if (Offset < 0) {
+ MI->setDesc(TII->get(CSKY::SUBI32));
+ Offset = -Offset;
+ }
+
+ return true;
+ }
+
+ if (MI->getOpcode() == CSKY::ADDI16XZ)
+ return false;
+
+ if (Offset < 0)
+ return false;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ switch (AddrMode) {
+ case CSKYII::AddrMode32B:
+ Scale = 1;
+ NumBits = 12;
+ break;
+ case CSKYII::AddrMode32H:
+ Scale = 2;
+ NumBits = 12;
+ break;
+ case CSKYII::AddrMode32WD:
+ Scale = 4;
+ NumBits = 12;
+ break;
+ case CSKYII::AddrMode16B:
+ Scale = 1;
+ NumBits = 5;
+ break;
+ case CSKYII::AddrMode16H:
+ Scale = 2;
+ NumBits = 5;
+ break;
+ case CSKYII::AddrMode16W:
+ Scale = 4;
+ NumBits = 5;
+ break;
+ case CSKYII::AddrMode32SDF:
+ Scale = 4;
+ NumBits = 8;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ }
+
+ // Cannot encode offset.
+ if ((Offset & (Scale - 1)) != 0)
+ return false;
+
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale)
+ return true;
+
+ // Offset out of range.
+ return false;
+}
+
void CSKYRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
-} \ No newline at end of file
+
+ MachineInstr *MI = &*II;
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const CSKYInstrInfo *TII = MF.getSubtarget<CSKYSubtarget>().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ const CSKYSubtarget &STI = MF.getSubtarget<CSKYSubtarget>();
+
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case CSKY::RESTORE_CARRY: {
+ Register NewReg = STI.hasE2()
+ ? MRI.createVirtualRegister(&CSKY::GPRRegClass)
+ : MRI.createVirtualRegister(&CSKY::mGPRRegClass);
+
+ auto *Temp = BuildMI(MBB, II, DL, TII->get(CSKY::LD32W), NewReg)
+ .add(MI->getOperand(1))
+ .add(MI->getOperand(2))
+ .getInstr();
+
+ BuildMI(MBB, II, DL, TII->get(STI.hasE2() ? CSKY::BTSTI32 : CSKY::BTSTI16),
+ MI->getOperand(0).getReg())
+ .addReg(NewReg, getKillRegState(true))
+ .addImm(0);
+
+ MI = Temp;
+
+ MBB.erase(II);
+ break;
+ }
+ case CSKY::SPILL_CARRY: {
+ Register NewReg;
+ if (STI.hasE2()) {
+ NewReg = MRI.createVirtualRegister(&CSKY::GPRRegClass);
+ BuildMI(MBB, II, DL, TII->get(CSKY::MVC32), NewReg)
+ .add(MI->getOperand(0));
+ } else {
+ NewReg = MRI.createVirtualRegister(&CSKY::mGPRRegClass);
+ BuildMI(MBB, II, DL, TII->get(CSKY::MOVI16), NewReg).addImm(0);
+ BuildMI(MBB, II, DL, TII->get(CSKY::ADDC16))
+ .addReg(NewReg, RegState::Define)
+ .addReg(MI->getOperand(0).getReg(), RegState::Define)
+ .addReg(NewReg, getKillRegState(true))
+ .addReg(NewReg, getKillRegState(true))
+ .addReg(MI->getOperand(0).getReg());
+
+ BuildMI(MBB, II, DL, TII->get(CSKY::BTSTI16), MI->getOperand(0).getReg())
+ .addReg(NewReg)
+ .addImm(0);
+ }
+
+ MI = BuildMI(MBB, II, DL, TII->get(CSKY::ST32W))
+ .addReg(NewReg, getKillRegState(true))
+ .add(MI->getOperand(1))
+ .add(MI->getOperand(2))
+ .getInstr();
+
+ MBB.erase(II);
+
+ break;
+ }
+ }
+
+ int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
+ Register FrameReg;
+ int Offset = getFrameLowering(MF)
+ ->getFrameIndexReference(MF, FrameIndex, FrameReg)
+ .getFixed() +
+ MI->getOperand(FIOperandNum + 1).getImm();
+
+ if (!isInt<32>(Offset))
+ report_fatal_error(
+ "Frame offsets outside of the signed 32-bit range not supported");
+
+ bool FrameRegIsKill = false;
+ MachineBasicBlock::iterator NewII(MI);
+ if (!IsLegalOffset(TII, MI, Offset)) {
+ assert(isInt<32>(Offset) && "Int32 expected");
+ // The offset won't fit in an immediate, so use a scratch register instead
+ // Modify Offset and FrameReg appropriately
+ assert(Offset >= 0);
+ Register ScratchReg = TII->movImm(MBB, NewII, DL, Offset);
+ BuildMI(MBB, NewII, DL,
+ TII->get(STI.hasE2() ? CSKY::ADDU32 : CSKY::ADDU16XZ), ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(FrameReg);
+
+ Offset = 0;
+ FrameReg = ScratchReg;
+ FrameRegIsKill = true;
+ }
+
+ if (Offset == 0 &&
+ (MI->getOpcode() == CSKY::ADDI32 || MI->getOpcode() == CSKY::ADDI16XZ)) {
+ MI->setDesc(TII->get(TargetOpcode::COPY));
+ MI->getOperand(FIOperandNum)
+ .ChangeToRegister(FrameReg, false, false, FrameRegIsKill);
+ MI->RemoveOperand(FIOperandNum + 1);
+ } else {
+ MI->getOperand(FIOperandNum)
+ .ChangeToRegister(FrameReg, false, false, FrameRegIsKill);
+ MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ }
+}
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.h b/llvm/lib/Target/CSKY/CSKYRegisterInfo.h
index 779ea6493c7e..5b3b62ec0db2 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.h
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.h
@@ -38,6 +38,18 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS) const override;
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ bool useFPForScavengingIndex(const MachineFunction &MF) const override {
+ return false;
+ }
};
} // namespace llvm
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
index 7548c22bb2c5..ade5c7f795af 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
@@ -168,6 +168,11 @@ def mGPR : RegisterClass<"CSKY", [i32], 32,
let Size = 32;
}
+// Register class for SP only.
+def GPRSP : RegisterClass<"CSKY", [i32], 32, (add R14)> {
+ let Size = 32;
+}
+
def GPRPair : RegisterClass<"CSKY", [untyped], 32, (add GPRTuple)> {
let Size = 64;
}
diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td
index 7518fd774a48..ae811b30434d 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/llvm/lib/Target/Hexagon/Hexagon.td
@@ -29,6 +29,8 @@ def ProcTinyCore: SubtargetFeature<"tinycore", "HexagonProcFamily",
// Hexagon ISA Extensions
def ExtensionZReg: SubtargetFeature<"zreg", "UseZRegOps", "true",
"Hexagon ZReg extension instructions">;
+def ExtensionHVXQFloat: SubtargetFeature<"hvx-qfloat", "UseHVXQFloatOps",
+ "true", "Hexagon HVX QFloating point instructions">;
def ExtensionHVX: SubtargetFeature<"hvx", "HexagonHVXVersion",
"Hexagon::ArchEnum::V60", "Hexagon HVX instructions">;
@@ -52,6 +54,10 @@ def ExtensionHVXV68: SubtargetFeature<"hvxv68", "HexagonHVXVersion",
"Hexagon::ArchEnum::V68", "Hexagon HVX instructions",
[ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66,
ExtensionHVXV67]>;
+def ExtensionHVXV69: SubtargetFeature<"hvxv69", "HexagonHVXVersion",
+ "Hexagon::ArchEnum::V69", "Hexagon HVX instructions",
+ [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66,
+ ExtensionHVXV67, ExtensionHVXV68]>;
def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps",
"true", "Hexagon HVX 64B instructions", [ExtensionHVX]>;
@@ -61,6 +67,9 @@ def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps",
def ExtensionAudio: SubtargetFeature<"audio", "UseAudioOps", "true",
"Hexagon Audio extension instructions">;
+def ExtensionHVXIEEEFP: SubtargetFeature<"hvx-ieee-fp", "UseHVXIEEEFPOps",
+ "true", "Hexagon HVX IEEE floating point instructions">;
+
def FeatureCompound: SubtargetFeature<"compound", "UseCompound", "true",
"Use compound instructions">;
def FeaturePackets: SubtargetFeature<"packets", "UsePackets", "true",
@@ -88,6 +97,8 @@ def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19",
def FeatureNoreturnStackElim: SubtargetFeature<"noreturn-stack-elim",
"NoreturnStackElim", "true",
"Eliminate stack allocation in a noreturn function when possible">;
+def FeatureCabac: SubtargetFeature<"cabac", "UseCabac", "false",
+ "Emit the CABAC instruction">;
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
@@ -112,6 +123,8 @@ def UseHVXV67 : Predicate<"HST->useHVXV67Ops()">,
AssemblerPredicate<(all_of ExtensionHVXV67)>;
def UseHVXV68 : Predicate<"HST->useHVXV68Ops()">,
AssemblerPredicate<(all_of ExtensionHVXV68)>;
+def UseHVXV69 : Predicate<"HST->useHVXV69Ops()">,
+ AssemblerPredicate<(all_of ExtensionHVXV69)>;
def UseAudio : Predicate<"HST->useAudioOps()">,
AssemblerPredicate<(all_of ExtensionAudio)>;
def UseZReg : Predicate<"HST->useZRegOps()">,
@@ -119,6 +132,11 @@ def UseZReg : Predicate<"HST->useZRegOps()">,
def UseCompound : Predicate<"HST->useCompound()">;
def HasPreV65 : Predicate<"HST->hasPreV65()">,
AssemblerPredicate<(all_of FeaturePreV65)>;
+def UseHVXIEEEFP : Predicate<"HST->useHVXIEEEFPOps()">,
+ AssemblerPredicate<(all_of ExtensionHVXIEEEFP)>;
+def UseHVXQFloat : Predicate<"HST->useHVXQFloatOps()">,
+ AssemblerPredicate<(all_of ExtensionHVXQFloat)>;
+def UseHVXFloatingPoint: Predicate<"HST->useHVXFloatingPoint()">;
def HasMemNoShuf : Predicate<"HST->hasMemNoShuf()">,
AssemblerPredicate<(all_of FeatureMemNoShuf)>;
def UseUnsafeMath : Predicate<"HST->useUnsafeMath()">;
@@ -127,6 +145,8 @@ def NotOptTinyCore : Predicate<"!HST->isTinyCore() ||"
let RecomputePerFunction = 1;
}
def UseSmallData : Predicate<"HST->useSmallData()">;
+def UseCabac : Predicate<"HST->useCabac()">,
+ AssemblerPredicate<(any_of FeatureCabac)>;
def Hvx64: HwMode<"+hvx-length64b">;
def Hvx128: HwMode<"+hvx-length128b">;
@@ -299,7 +319,7 @@ def changeAddrMode_rr_ur: InstrMapping {
let ValueCols = [["BaseLongOffset"]];
}
-def changeAddrMode_ur_rr : InstrMapping {
+def changeAddrMode_ur_rr: InstrMapping {
let FilterClass = "ImmRegShl";
let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
let ColFields = ["addrMode"];
@@ -370,40 +390,55 @@ class Proc<string Name, SchedMachineModel Model,
def : Proc<"generic", HexagonModelV60,
[ArchV5, ArchV55, ArchV60,
FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops,
- FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+ FeatureCabac]>;
def : Proc<"hexagonv5", HexagonModelV5,
[ArchV5,
FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops,
- FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+ FeatureCabac]>;
def : Proc<"hexagonv55", HexagonModelV55,
[ArchV5, ArchV55,
FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops,
- FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+ FeatureCabac]>;
def : Proc<"hexagonv60", HexagonModelV60,
[ArchV5, ArchV55, ArchV60,
FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops,
- FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+ FeatureCabac]>;
def : Proc<"hexagonv62", HexagonModelV62,
[ArchV5, ArchV55, ArchV60, ArchV62,
FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops,
- FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+ FeatureCabac]>;
def : Proc<"hexagonv65", HexagonModelV65,
[ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
- FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+ FeatureCabac]>;
def : Proc<"hexagonv66", HexagonModelV66,
[ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66,
FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
- FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+ FeatureCabac]>;
def : Proc<"hexagonv67", HexagonModelV67,
[ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67,
FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
- FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+ FeatureCabac]>;
def : Proc<"hexagonv68", HexagonModelV68,
[ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67,
ArchV68,
FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
- FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+ FeatureCabac]>;
+def : Proc<"hexagonv69", HexagonModelV69,
+ [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67,
+ ArchV68, ArchV69,
+ FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+ FeatureCabac]>;
// Need to update the correct features for tiny core.
// Disable NewValueJumps since the packetizer is unable to handle a packet with
// a new value jump and another SLOT0 instruction.
diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 8e6a01e3a186..411078052e0f 100644
--- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -773,6 +773,67 @@ void HexagonAsmPrinter::emitInstruction(const MachineInstr *MI) {
OutStreamer->emitInstruction(MCB, getSubtargetInfo());
}
+void HexagonAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
+ static const int8_t NoopsInSledCount = 4;
+ // We want to emit the following pattern:
+ //
+ // .L_xray_sled_N:
+ // <xray_sled_base>:
+ // { jump .Ltmp0 }
+ // { nop
+ // nop
+ // nop
+ // nop }
+ // .Ltmp0:
+ //
+ // We need the 4 nop words because at runtime, we'd be patching over the
+ // full 5 words with the following pattern:
+ //
+ // <xray_sled_n>:
+ // { immext(#...) // upper 26-bits of trampoline
+ // r6 = ##... // lower 6-bits of trampoline
+ // immext(#...) // upper 26-bits of func id
+ // r7 = ##... } // lower 6 bits of func id
+ // { callr r6 }
+ //
+ //
+ auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+ OutStreamer->emitLabel(CurSled);
+
+ MCInst *SledJump = new (OutContext) MCInst();
+ SledJump->setOpcode(Hexagon::J2_jump);
+ auto PostSled = OutContext.createTempSymbol();
+ SledJump->addOperand(MCOperand::createExpr(HexagonMCExpr::create(
+ MCSymbolRefExpr::create(PostSled, OutContext), OutContext)));
+
+ // Emit "jump PostSled" instruction, which jumps over the nop series.
+ MCInst SledJumpPacket;
+ SledJumpPacket.setOpcode(Hexagon::BUNDLE);
+ SledJumpPacket.addOperand(MCOperand::createImm(0));
+ SledJumpPacket.addOperand(MCOperand::createInst(SledJump));
+
+ EmitToStreamer(*OutStreamer, SledJumpPacket);
+
+ // FIXME: this will emit individual packets, we should
+ // special-case this and combine them into a single packet.
+ emitNops(NoopsInSledCount);
+
+ OutStreamer->emitLabel(PostSled);
+ recordSled(CurSled, MI, Kind, 0);
+}
+
+void HexagonAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) {
+ EmitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void HexagonAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) {
+ EmitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void HexagonAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) {
+ EmitSled(MI, SledKind::TAIL_CALL);
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonAsmPrinter() {
RegisterAsmPrinter<HexagonAsmPrinter> X(getTheHexagonTarget());
}
diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
index 3932def87854..93d5f1dce7af 100644
--- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -36,7 +36,11 @@ class TargetMachine;
bool runOnMachineFunction(MachineFunction &Fn) override {
Subtarget = &Fn.getSubtarget<HexagonSubtarget>();
- return AsmPrinter::runOnMachineFunction(Fn);
+ const bool Modified = AsmPrinter::runOnMachineFunction(Fn);
+ // Emit the XRay table for this function.
+ emitXRayTable();
+
+ return Modified;
}
StringRef getPassName() const override {
@@ -47,6 +51,16 @@ class TargetMachine;
const override;
void emitInstruction(const MachineInstr *MI) override;
+
+ //===------------------------------------------------------------------===//
+ // XRay implementation
+ //===------------------------------------------------------------------===//
+ // XRay-specific lowering for Hexagon.
+ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
+ void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
+ void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
+ void EmitSled(const MachineInstr &MI, SledKind Kind);
+
void HexagonProcessInstruction(MCInst &Inst, const MachineInstr &MBB);
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 2c5ad3b589d2..428d25da6dbc 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -995,8 +995,8 @@ bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
MachineBasicBlock *B = N->getBlock();
std::vector<MachineInstr*> Instrs;
- for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
- Instrs.push_back(&*I);
+ for (MachineInstr &MI : llvm::reverse(*B))
+ Instrs.push_back(&MI);
for (auto MI : Instrs) {
unsigned Opc = MI->getOpcode();
@@ -3084,8 +3084,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
.addMBB(&LB);
RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
- for (unsigned i = G.Ins.size(); i > 0; --i) {
- const MachineInstr *SI = G.Ins[i-1];
+ for (const MachineInstr *SI : llvm::reverse(G.Ins)) {
unsigned DR = getDefReg(SI);
const TargetRegisterClass *RC = MRI->getRegClass(DR);
Register NewDR = MRI->createVirtualRegister(RC);
@@ -3156,20 +3155,20 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
// if that instruction could potentially be moved to the front of the loop:
// the output of the loop cannot be used in a non-shuffling instruction
// in this loop.
- for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) {
- if (I->isTerminator())
+ for (MachineInstr &MI : llvm::reverse(*C.LB)) {
+ if (MI.isTerminator())
continue;
- if (I->isPHI())
+ if (MI.isPHI())
break;
RegisterSet Defs;
- HBS::getInstrDefs(*I, Defs);
+ HBS::getInstrDefs(MI, Defs);
if (Defs.count() != 1)
continue;
Register DefR = Defs.find_first();
if (!DefR.isVirtual())
continue;
- if (!isBitShuffle(&*I, DefR))
+ if (!isBitShuffle(&MI, DefR))
continue;
bool BadUse = false;
@@ -3183,8 +3182,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
if (UseI->getOperand(Idx+1).getMBB() != C.LB)
BadUse = true;
} else {
- auto F = find(ShufIns, UseI);
- if (F == ShufIns.end())
+ if (!llvm::is_contained(ShufIns, UseI))
BadUse = true;
}
} else {
@@ -3199,7 +3197,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
if (BadUse)
continue;
- ShufIns.push_back(&*I);
+ ShufIns.push_back(&MI);
}
// Partition the list of shuffling instructions into instruction groups,
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 8c3b9572201e..a53efeb96961 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -1256,15 +1256,11 @@ void HexagonCommonGEP::removeDeadCode() {
BO.push_back(DTN->getBlock());
}
- for (unsigned i = BO.size(); i > 0; --i) {
- BasicBlock *B = cast<BasicBlock>(BO[i-1]);
- BasicBlock::InstListType &IL = B->getInstList();
-
- using reverse_iterator = BasicBlock::InstListType::reverse_iterator;
-
+ for (Value *V : llvm::reverse(BO)) {
+ BasicBlock *B = cast<BasicBlock>(V);
ValueVect Ins;
- for (reverse_iterator I = IL.rbegin(), E = IL.rend(); I != E; ++I)
- Ins.push_back(&*I);
+ for (Instruction &I : llvm::reverse(*B))
+ Ins.push_back(&I);
for (ValueVect::iterator I = Ins.begin(), E = Ins.end(); I != E; ++I) {
Instruction *In = cast<Instruction>(*I);
if (isInstructionTriviallyDead(In))
diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h
index 7a43a4440b2d..56174dc7e136 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h
@@ -21,31 +21,32 @@
namespace llvm {
namespace Hexagon {
-enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68 };
+enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68, V69 };
-static constexpr unsigned ArchValsNumArray[] = {5, 55, 60, 62, 65, 66, 67, 68};
+static constexpr unsigned ArchValsNumArray[] = {5, 55, 60, 62, 65, 66, 67, 68, 69};
static constexpr ArrayRef<unsigned> ArchValsNum(ArchValsNumArray);
-static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v68" };
+static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v68", "v69" };
static constexpr ArrayRef<StringLiteral> ArchValsText(ArchValsTextArray);
-static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t", "hexagonv68" };
+static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t", "hexagonv68", "hexagonv69" };
static constexpr ArrayRef<StringLiteral> CpuValsText(CpuValsTextArray);
-static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t", "v68" };
+static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t", "v68", "v69" };
static constexpr ArrayRef<StringLiteral> CpuNickText(CpuNickTextArray);
static const std::map<std::string, ArchEnum> CpuTable{
- {"generic", Hexagon::ArchEnum::V5},
- {"hexagonv5", Hexagon::ArchEnum::V5},
- {"hexagonv55", Hexagon::ArchEnum::V55},
- {"hexagonv60", Hexagon::ArchEnum::V60},
- {"hexagonv62", Hexagon::ArchEnum::V62},
- {"hexagonv65", Hexagon::ArchEnum::V65},
- {"hexagonv66", Hexagon::ArchEnum::V66},
- {"hexagonv67", Hexagon::ArchEnum::V67},
- {"hexagonv67t", Hexagon::ArchEnum::V67},
- {"hexagonv68", Hexagon::ArchEnum::V68},
+ {"generic", Hexagon::ArchEnum::V5},
+ {"hexagonv5", Hexagon::ArchEnum::V5},
+ {"hexagonv55", Hexagon::ArchEnum::V55},
+ {"hexagonv60", Hexagon::ArchEnum::V60},
+ {"hexagonv62", Hexagon::ArchEnum::V62},
+ {"hexagonv65", Hexagon::ArchEnum::V65},
+ {"hexagonv66", Hexagon::ArchEnum::V66},
+ {"hexagonv67", Hexagon::ArchEnum::V67},
+ {"hexagonv67t", Hexagon::ArchEnum::V67},
+ {"hexagonv68", Hexagon::ArchEnum::V68},
+ {"hexagonv69", Hexagon::ArchEnum::V69},
};
static const std::map<std::string, unsigned> ElfFlagsByCpuStr = {
@@ -59,6 +60,7 @@ static const std::map<std::string, unsigned> ElfFlagsByCpuStr = {
{"hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67},
{"hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T},
{"hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68},
+ {"hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69},
};
static const std::map<unsigned, std::string> ElfArchByMachFlags = {
{llvm::ELF::EF_HEXAGON_MACH_V5, "V5"},
@@ -70,6 +72,7 @@ static const std::map<unsigned, std::string> ElfArchByMachFlags = {
{llvm::ELF::EF_HEXAGON_MACH_V67, "V67"},
{llvm::ELF::EF_HEXAGON_MACH_V67T, "V67T"},
{llvm::ELF::EF_HEXAGON_MACH_V68, "V68"},
+ {llvm::ELF::EF_HEXAGON_MACH_V69, "V69"},
};
static const std::map<unsigned, std::string> ElfCpuByMachFlags = {
{llvm::ELF::EF_HEXAGON_MACH_V5, "hexagonv5"},
@@ -81,6 +84,7 @@ static const std::map<unsigned, std::string> ElfCpuByMachFlags = {
{llvm::ELF::EF_HEXAGON_MACH_V67, "hexagonv67"},
{llvm::ELF::EF_HEXAGON_MACH_V67T, "hexagonv67t"},
{llvm::ELF::EF_HEXAGON_MACH_V68, "hexagonv68"},
+ {llvm::ELF::EF_HEXAGON_MACH_V69, "hexagonv69"},
};
} // namespace Hexagon
diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.td b/llvm/lib/Target/Hexagon/HexagonDepArch.td
index e743a291f1e5..e4f24e3c2e66 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.td
@@ -24,3 +24,5 @@ def ArchV67: SubtargetFeature<"v67", "HexagonArchVersion", "Hexagon::ArchEnum::V
def HasV67 : Predicate<"HST->hasV67Ops()">, AssemblerPredicate<(all_of ArchV67)>;
def ArchV68: SubtargetFeature<"v68", "HexagonArchVersion", "Hexagon::ArchEnum::V68", "Enable Hexagon V68 architecture">;
def HasV68 : Predicate<"HST->hasV68Ops()">, AssemblerPredicate<(all_of ArchV68)>;
+def ArchV69: SubtargetFeature<"v69", "HexagonArchVersion", "Hexagon::ArchEnum::V69", "Enable Hexagon V69 architecture">;
+def HasV69 : Predicate<"HST->hasV69Ops()">, AssemblerPredicate<(all_of ArchV69)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc b/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc
index 40f6e14aed13..7164af3ad5c6 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc
+++ b/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc
@@ -8,6 +8,7 @@
// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
+
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-function"
diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
index a1db3ae7239d..d195df918293 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -11,6 +11,7 @@
def tc_04da405a : InstrItinClass;
def tc_05ca8cfd : InstrItinClass;
def tc_08a4f1b6 : InstrItinClass;
+def tc_0afc8be9 : InstrItinClass;
def tc_0b04c6c7 : InstrItinClass;
def tc_0ec46cf9 : InstrItinClass;
def tc_131f1c81 : InstrItinClass;
@@ -21,6 +22,7 @@ def tc_191381c1 : InstrItinClass;
def tc_1ad8a370 : InstrItinClass;
def tc_1ba8a0cd : InstrItinClass;
def tc_20a4bbec : InstrItinClass;
+def tc_2120355e : InstrItinClass;
def tc_257f6f7c : InstrItinClass;
def tc_26a377fe : InstrItinClass;
def tc_2b4c548e : InstrItinClass;
@@ -28,15 +30,18 @@ def tc_2c745bb8 : InstrItinClass;
def tc_2d4051cd : InstrItinClass;
def tc_2e8f5f6e : InstrItinClass;
def tc_309dbb4f : InstrItinClass;
+def tc_37820f4c : InstrItinClass;
def tc_3904b926 : InstrItinClass;
def tc_3aacf4a8 : InstrItinClass;
def tc_3ad719fb : InstrItinClass;
def tc_3c56e5ce : InstrItinClass;
+def tc_3c8c15d0 : InstrItinClass;
def tc_3ce09744 : InstrItinClass;
def tc_3e2aaafc : InstrItinClass;
def tc_447d9895 : InstrItinClass;
def tc_453fe68d : InstrItinClass;
def tc_46d6c3e0 : InstrItinClass;
+def tc_4942646a : InstrItinClass;
def tc_51d0ecc3 : InstrItinClass;
def tc_52447ecc : InstrItinClass;
def tc_540c3da3 : InstrItinClass;
@@ -46,6 +51,7 @@ def tc_56c4f9fe : InstrItinClass;
def tc_56e64202 : InstrItinClass;
def tc_58d21193 : InstrItinClass;
def tc_5bf8afbb : InstrItinClass;
+def tc_5cdf8c84 : InstrItinClass;
def tc_61bf7c03 : InstrItinClass;
def tc_649072c2 : InstrItinClass;
def tc_660769f1 : InstrItinClass;
@@ -57,6 +63,8 @@ def tc_71646d06 : InstrItinClass;
def tc_7177e272 : InstrItinClass;
def tc_718b5c53 : InstrItinClass;
def tc_7273323b : InstrItinClass;
+def tc_72e2b393 : InstrItinClass;
+def tc_73efe966 : InstrItinClass;
def tc_7417e785 : InstrItinClass;
def tc_767c4e9d : InstrItinClass;
def tc_7d68d5c2 : InstrItinClass;
@@ -71,9 +79,11 @@ def tc_9d1dc972 : InstrItinClass;
def tc_9f363d21 : InstrItinClass;
def tc_a02a10a8 : InstrItinClass;
def tc_a0dbea28 : InstrItinClass;
+def tc_a19b9305 : InstrItinClass;
def tc_a28f32b5 : InstrItinClass;
def tc_a69eeee1 : InstrItinClass;
def tc_a7e6707d : InstrItinClass;
+def tc_aa047364 : InstrItinClass;
def tc_ab23f776 : InstrItinClass;
def tc_abe8c3b2 : InstrItinClass;
def tc_ac4046bc : InstrItinClass;
@@ -89,8 +99,10 @@ def tc_c4edf264 : InstrItinClass;
def tc_c5dba46e : InstrItinClass;
def tc_c7039829 : InstrItinClass;
def tc_cd94bfe0 : InstrItinClass;
+def tc_cda936da : InstrItinClass;
def tc_d8287c14 : InstrItinClass;
def tc_db5555f3 : InstrItinClass;
+def tc_dcca380f : InstrItinClass;
def tc_dd5b0695 : InstrItinClass;
def tc_df80eeb0 : InstrItinClass;
def tc_e2d2e9e5 : InstrItinClass;
@@ -99,6 +111,7 @@ def tc_e3f68a46 : InstrItinClass;
def tc_e675c45a : InstrItinClass;
def tc_e699ae41 : InstrItinClass;
def tc_e99d4c2e : InstrItinClass;
+def tc_f175e046 : InstrItinClass;
def tc_f1de44ef : InstrItinClass;
def tc_f21e8abb : InstrItinClass;
@@ -119,6 +132,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -174,6 +192,10 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_ST]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2120355e, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
@@ -209,6 +231,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_37820f4c, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
@@ -231,6 +258,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2],
@@ -259,6 +291,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5],
@@ -306,6 +343,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_XLANE]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
@@ -363,6 +405,16 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_72e2b393, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_7417e785, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
@@ -437,6 +489,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_ZW]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_a28f32b5, /*SLOT1,LOAD,VA*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
@@ -456,6 +513,10 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_aa047364, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -537,6 +598,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
@@ -547,6 +613,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_dcca380f, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_ZW]>], [2, 1, 2],
@@ -589,6 +660,11 @@ class DepHVXItinV55 {
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
[InstrStage<1, [SLOT2], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -620,6 +696,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -675,6 +756,10 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_ST]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2120355e, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
@@ -710,6 +795,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_37820f4c, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
@@ -732,6 +822,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2],
@@ -760,6 +855,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5],
@@ -807,6 +907,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_XLANE]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
@@ -864,6 +969,16 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_72e2b393, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_7417e785, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
@@ -938,6 +1053,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_ZW]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_a28f32b5, /*SLOT1,LOAD,VA*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
@@ -957,6 +1077,10 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_aa047364, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -1038,6 +1162,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
@@ -1048,6 +1177,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_dcca380f, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_ZW]>], [2, 1, 2],
@@ -1090,6 +1224,11 @@ class DepHVXItinV60 {
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
[InstrStage<1, [SLOT2], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -1121,6 +1260,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -1176,6 +1320,10 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_ST]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2120355e, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
@@ -1211,6 +1359,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_37820f4c, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
@@ -1233,6 +1386,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2],
@@ -1261,6 +1419,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5],
@@ -1308,6 +1471,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_XLANE]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
@@ -1365,6 +1533,16 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_72e2b393, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_7417e785, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
@@ -1439,6 +1617,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_ZW]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_a28f32b5, /*SLOT1,LOAD,VA*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
@@ -1458,6 +1641,10 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_aa047364, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -1539,6 +1726,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
@@ -1549,6 +1741,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_dcca380f, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_ZW]>], [2, 1, 2],
@@ -1591,6 +1788,11 @@ class DepHVXItinV62 {
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
[InstrStage<1, [SLOT2], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -1622,6 +1824,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -1677,6 +1884,10 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_ST]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2120355e, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
@@ -1712,6 +1923,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_37820f4c, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
@@ -1734,6 +1950,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2],
@@ -1762,6 +1983,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5],
@@ -1809,6 +2035,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_XLANE]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
@@ -1866,6 +2097,16 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_72e2b393, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_7417e785, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
@@ -1940,6 +2181,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_ZW]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_a28f32b5, /*SLOT1,LOAD,VA*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
@@ -1959,6 +2205,10 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_aa047364, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -2040,6 +2290,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
@@ -2050,6 +2305,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_dcca380f, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_ZW]>], [2, 1, 2],
@@ -2092,6 +2352,11 @@ class DepHVXItinV65 {
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
[InstrStage<1, [SLOT2], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -2123,6 +2388,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -2178,6 +2448,10 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_ST]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2120355e, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
@@ -2213,6 +2487,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_37820f4c, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
@@ -2235,6 +2514,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2],
@@ -2263,6 +2547,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5],
@@ -2310,6 +2599,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_XLANE]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
@@ -2367,6 +2661,16 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_72e2b393, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_7417e785, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
@@ -2441,6 +2745,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_ZW]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_a28f32b5, /*SLOT1,LOAD,VA*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
@@ -2460,6 +2769,10 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_aa047364, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -2541,6 +2854,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
@@ -2551,6 +2869,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_dcca380f, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_ZW]>], [2, 1, 2],
@@ -2593,6 +2916,11 @@ class DepHVXItinV66 {
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
[InstrStage<1, [SLOT2], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -2624,6 +2952,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -2679,6 +3012,10 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_ST]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2120355e, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
@@ -2714,6 +3051,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_37820f4c, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
@@ -2736,6 +3078,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2],
@@ -2764,6 +3111,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5],
@@ -2811,6 +3163,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_XLANE]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
@@ -2868,6 +3225,16 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_72e2b393, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_7417e785, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
@@ -2942,6 +3309,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_ZW]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_a28f32b5, /*SLOT1,LOAD,VA*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
@@ -2961,6 +3333,10 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_aa047364, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -3042,6 +3418,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
@@ -3052,6 +3433,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_dcca380f, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_ZW]>], [2, 1, 2],
@@ -3094,6 +3480,11 @@ class DepHVXItinV67 {
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
[InstrStage<1, [SLOT2], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -3125,6 +3516,575 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2120355e, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_37820f4c, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7095ecba, /*SLOT01,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_72e2b393, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7d68d5c2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a28f32b5, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a69eeee1, /*SLOT01,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_aa047364, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb599486, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_dcca380f, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+}
+
+class DepHVXItinV69 {
+ list<InstrItinData> DepHVXItinV69_list = [
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
@@ -3180,6 +4140,10 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_ST]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2120355e, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
@@ -3215,6 +4179,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_37820f4c, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
@@ -3237,6 +4206,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2],
@@ -3265,6 +4239,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5],
@@ -3312,6 +4291,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_XLANE]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
@@ -3369,6 +4353,16 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_72e2b393, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_7417e785, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
@@ -3443,6 +4437,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_ZW]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_a28f32b5, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
@@ -3462,6 +4461,10 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_aa047364, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -3543,6 +4546,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
@@ -3553,6 +4561,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_dcca380f, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_ZW]>], [2, 1, 2],
@@ -3595,6 +4608,11 @@ class DepHVXItinV68 {
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
[InstrStage<1, [SLOT2], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
index a3766652794b..a979bafe8e33 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -7338,3 +7338,771 @@ class DepScalarItinV68 {
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
];
}
+
+class DepScalarItinV69 {
+ list<InstrItinData> DepScalarItinV69_list = [
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0655b949, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_151bf368, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1d41f8b7, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20131976, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_23708a21, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_2471c1c8, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24f426ab, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_2f573607, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_362b0be2, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_38382228, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_388f9897, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_38e0bae9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_407e96f9, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_42ff66ba, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_442395f3, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44fffc58, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4a55d03c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4abdbdc6, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4bf903b0, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_53c851ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5502c366, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55b33fda, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56a124a7, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57a55b54, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_59a7822c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_60e324ff, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_63567288, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_651cbe02, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_69bfb303, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6d861a95, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_711c805f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_713b66bf, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7476d766, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_788b1d09, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7af3a37e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_829d8a86, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_84a7500d, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_86173609, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b5bd4f5, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c52f549, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e27f2f9, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1297125, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a2b365d2, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_a7a13fac, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae5babd7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c20701f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d33e5eee, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_db596beb, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_dc51281d, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_decdde8a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed03645c, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eeda4109, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f999c66e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+ ];
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td b/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
index b3f1b6638193..65d36924ba48 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -2288,6 +2288,12 @@ class Enc_a30110 : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
+class Enc_a33d04 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
class Enc_a42857 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -3109,6 +3115,14 @@ class Enc_de0214 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
+class Enc_de5ea0 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
class Enc_e07374 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index 4f00409c336c..c02988266584 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -5824,8 +5824,8 @@ let Inst{31-21} = 0b01010100100;
let hasNewValue = 1;
let opNewValue = 0;
let isSolo = 1;
-let Uses = [GOSP];
-let Defs = [GOSP, PC];
+let Uses = [CCR, GOSP];
+let Defs = [CCR, GOSP, PC];
let hasSideEffects = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -8500,6 +8500,8 @@ let Inst{31-21} = 0b01010010101;
let isTerminator = 1;
let isIndirectBranch = 1;
let isBranch = 1;
+let cofRelax1 = 1;
+let cofRelax2 = 1;
let cofMax1 = 1;
}
def J4_jumpseti : HInst<
@@ -18210,16 +18212,6 @@ let opExtentBits = 18;
let opExtentAlign = 2;
let opNewValue = 1;
}
-def PS_trap1 : HInst<
-(outs),
-(ins u8_0Imm:$Ii),
-"trap1(#$Ii)",
-tc_53c851ab, TypeJ>, Enc_a51a9a, Requires<[HasPreV65]> {
-let Inst{1-0} = 0b00;
-let Inst{7-5} = 0b000;
-let Inst{13-13} = 0b0;
-let Inst{31-16} = 0b0101010010000000;
-}
def R6_release_at_vi : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -18964,7 +18956,7 @@ def S2_cabacdecbin : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = decbin($Rss32,$Rtt32)",
-tc_db596beb, TypeS_3op>, Enc_a56825 {
+tc_db596beb, TypeS_3op>, Enc_a56825, Requires<[UseCabac]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -26883,17 +26875,6 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
-def V6_ldntnt0 : HInst<
-(outs HvxVR:$Vd32),
-(ins IntRegs:$Rt32),
-"$Vd32 = vmem($Rt32):nt",
-PSEUDO, TypeMAPPING>, Requires<[HasV62]> {
-let hasNewValue = 1;
-let opNewValue = 0;
-let isPseudo = 1;
-let isCodeGenOnly = 1;
-let DecoderNamespace = "EXT_mmvec";
-}
def V6_ldp0 : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32),
@@ -27312,6 +27293,30 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_v10mpyubs10 : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxWR:$Vuu32, HvxWR:$Vvv32, u1_0Imm:$Ii),
+"$Vdd32.w = v10mpy($Vuu32.ub,$Vvv32.b,#$Ii)",
+tc_f175e046, TypeCVI_VX>, Requires<[UseHVXV69]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_v10mpyubs10_vxx : HInst<
+(outs HvxWR:$Vxx32),
+(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, HvxWR:$Vvv32, u1_0Imm:$Ii),
+"$Vxx32.w += v10mpy($Vuu32.ub,$Vvv32.b,#$Ii)",
+tc_4942646a, TypeCVI_VX>, Requires<[UseHVXV69]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isCVI = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
def V6_v6mpyhubs10 : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32, u2_0Imm:$Ii),
@@ -27396,7 +27401,7 @@ def V6_vL32Ub_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32 = vmemu($Rt32+#$Ii)",
-tc_a7e6707d, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]> {
+tc_a7e6707d, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]>, PostInc_BaseImm {
let Inst{7-5} = 0b111;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -27408,13 +27413,15 @@ let isCVLoad = 1;
let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
+let BaseOpcode = "V6_vL32Ub_ai";
+let CextOpcode = "V6_vL32Ub";
let DecoderNamespace = "EXT_mmvec";
}
def V6_vL32Ub_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32 = vmemu($Rx32++#$Ii)",
-tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]> {
+tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]>, PostInc_BaseImm {
let Inst{7-5} = 0b111;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -27427,6 +27434,7 @@ let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_pi";
+let CextOpcode = "V6_vL32Ub";
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
}
@@ -27452,7 +27460,7 @@ def V6_vL32b_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32 = vmem($Rt32+#$Ii)",
-tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -27465,6 +27473,7 @@ let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_ai";
+let CextOpcode = "V6_vL32b";
let isCVLoadable = 1;
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -27473,7 +27482,7 @@ def V6_vL32b_cur_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.cur = vmem($Rt32+#$Ii)",
-tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b001;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -27487,6 +27496,7 @@ let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_cur_ai";
+let CextOpcode = "V6_vL32b_cur";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -27560,7 +27570,7 @@ def V6_vL32b_cur_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.cur = vmem($Rx32++#$Ii)",
-tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b001;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -27574,6 +27584,7 @@ let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_cur_pi";
+let CextOpcode = "V6_vL32b_cur";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
@@ -27729,7 +27740,7 @@ def V6_vL32b_nt_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32 = vmem($Rt32+#$Ii):nt",
-tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000010;
@@ -27743,6 +27754,7 @@ let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_nt_ai";
+let CextOpcode = "V6_vL32b_nt";
let isCVLoadable = 1;
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -27751,7 +27763,7 @@ def V6_vL32b_nt_cur_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.cur = vmem($Rt32+#$Ii):nt",
-tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b001;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000010;
@@ -27766,6 +27778,7 @@ let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_nt_cur_ai";
+let CextOpcode = "V6_vL32b_nt_cur";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -27842,7 +27855,7 @@ def V6_vL32b_nt_cur_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.cur = vmem($Rx32++#$Ii):nt",
-tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b001;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001010;
@@ -27857,6 +27870,7 @@ let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_nt_cur_pi";
+let CextOpcode = "V6_vL32b_nt_cur";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
@@ -28019,7 +28033,7 @@ def V6_vL32b_nt_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32 = vmem($Rx32++#$Ii):nt",
-tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001010;
@@ -28033,6 +28047,7 @@ let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_nt_pi";
+let CextOpcode = "V6_vL32b_nt";
let isCVLoadable = 1;
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28127,7 +28142,7 @@ def V6_vL32b_nt_tmp_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.tmp = vmem($Rt32+#$Ii):nt",
-tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b010;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000010;
@@ -28137,11 +28152,12 @@ let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_nt_tmp_ai";
+let CextOpcode = "V6_vL32b_nt_tmp";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -28160,7 +28176,7 @@ let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -28183,7 +28199,7 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -28206,7 +28222,7 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -28218,7 +28234,7 @@ def V6_vL32b_nt_tmp_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.tmp = vmem($Rx32++#$Ii):nt",
-tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b010;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001010;
@@ -28228,11 +28244,12 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_nt_tmp_pi";
+let CextOpcode = "V6_vL32b_nt_tmp";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
@@ -28250,7 +28267,7 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -28273,7 +28290,7 @@ let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -28295,7 +28312,7 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -28317,7 +28334,7 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -28329,7 +28346,7 @@ def V6_vL32b_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32 = vmem($Rx32++#$Ii)",
-tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -28342,6 +28359,7 @@ let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_pi";
+let CextOpcode = "V6_vL32b";
let isCVLoadable = 1;
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28432,7 +28450,7 @@ def V6_vL32b_tmp_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.tmp = vmem($Rt32+#$Ii)",
-tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b010;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -28442,10 +28460,11 @@ let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ai";
+let CextOpcode = "V6_vL32b_tmp";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -28464,7 +28483,7 @@ let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ai";
@@ -28486,7 +28505,7 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_pi";
@@ -28508,7 +28527,7 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ppu";
@@ -28519,7 +28538,7 @@ def V6_vL32b_tmp_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.tmp = vmem($Rx32++#$Ii)",
-tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm {
let Inst{7-5} = 0b010;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -28529,10 +28548,11 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_pi";
+let CextOpcode = "V6_vL32b_tmp";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
@@ -28550,7 +28570,7 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ppu";
@@ -28572,7 +28592,7 @@ let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ai";
@@ -28593,7 +28613,7 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_pi";
@@ -28614,7 +28634,7 @@ let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
+let hasHvxTmp = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ppu";
@@ -28625,7 +28645,7 @@ def V6_vS32Ub_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"vmemu($Rt32+#$Ii) = $Vs32",
-tc_f21e8abb, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_f21e8abb, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm {
let Inst{7-5} = 0b111;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -28634,6 +28654,7 @@ let accessSize = HVXVectorAccess;
let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_ai";
+let CextOpcode = "V6_vS32Ub";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -28692,7 +28713,7 @@ def V6_vS32Ub_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"vmemu($Rx32++#$Ii) = $Vs32",
-tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm {
let Inst{7-5} = 0b111;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -28701,6 +28722,7 @@ let accessSize = HVXVectorAccess;
let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_pi";
+let CextOpcode = "V6_vS32Ub";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
@@ -28773,7 +28795,7 @@ def V6_vS32b_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rt32+#$Ii) = $Vs32",
-tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -28782,6 +28804,7 @@ let accessSize = HVXVectorAccess;
let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ai";
+let CextOpcode = "V6_vS32b";
let isNVStorable = 1;
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28790,7 +28813,7 @@ def V6_vS32b_new_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rt32+#$Ii) = $Os8.new",
-tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
+tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm {
let Inst{7-3} = 0b00100;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -28802,6 +28825,7 @@ let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ai";
+let CextOpcode = "V6_vS32b_new";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
let opNewValue = 2;
@@ -28873,7 +28897,7 @@ def V6_vS32b_new_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rx32++#$Ii) = $Os8.new",
-tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm {
let Inst{7-3} = 0b00100;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -28885,6 +28909,7 @@ let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_pi";
+let CextOpcode = "V6_vS32b_new";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
let opNewValue = 3;
@@ -29070,7 +29095,7 @@ def V6_vS32b_nt_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rt32+#$Ii):nt = $Vs32",
-tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000011;
@@ -29080,6 +29105,7 @@ let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ai";
+let CextOpcode = "V6_vS32b_nt";
let isNVStorable = 1;
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29088,7 +29114,7 @@ def V6_vS32b_nt_new_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rt32+#$Ii):nt = $Os8.new",
-tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
+tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm {
let Inst{7-3} = 0b00100;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000011;
@@ -29101,6 +29127,7 @@ let isNewValue = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ai";
+let CextOpcode = "V6_vS32b_nt_new";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
let opNewValue = 2;
@@ -29175,7 +29202,7 @@ def V6_vS32b_nt_new_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rx32++#$Ii):nt = $Os8.new",
-tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm {
let Inst{7-3} = 0b00100;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001011;
@@ -29188,6 +29215,7 @@ let isNewValue = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_pi";
+let CextOpcode = "V6_vS32b_nt_new";
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
let opNewValue = 3;
@@ -29383,7 +29411,7 @@ def V6_vS32b_nt_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rx32++#$Ii):nt = $Vs32",
-tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001011;
@@ -29393,6 +29421,7 @@ let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_pi";
+let CextOpcode = "V6_vS32b_nt";
let isNVStorable = 1;
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29519,7 +29548,7 @@ def V6_vS32b_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rx32++#$Ii) = $Vs32",
-tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -29528,6 +29557,7 @@ let accessSize = HVXVectorAccess;
let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_pi";
+let CextOpcode = "V6_vS32b";
let isNVStorable = 1;
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29689,6 +29719,32 @@ let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
}
+def V6_vabs_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.hf = vabs($Vu32.hf)",
+tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabs_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.sf = vabs($Vu32.sf)",
+tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vabsb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
@@ -29975,6 +30031,123 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vadd_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vadd($Vu32.hf,$Vv32.hf)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadd_hf_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.hf = vadd($Vu32.hf,$Vv32.hf)",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadd_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vadd($Vu32.qf16,$Vv32.qf16)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadd_qf16_mix : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vadd($Vu32.qf16,$Vv32.hf)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadd_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vadd($Vu32.qf32,$Vv32.qf32)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadd_qf32_mix : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vadd($Vu32.qf32,$Vv32.sf)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadd_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vadd($Vu32.sf,$Vv32.sf)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadd_sf_hf : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vdd32.sf = vadd($Vu32.hf,$Vv32.hf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadd_sf_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.sf = vadd($Vu32.sf,$Vv32.sf)",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vaddb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -31440,6 +31613,58 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vasrvuhubrndsat : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxWR:$Vuu32, HvxVR:$Vv32),
+"$Vd32.ub = vasr($Vuu32.uh,$Vv32.ub):rnd:sat",
+tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrvuhubsat : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxWR:$Vuu32, HvxVR:$Vv32),
+"$Vd32.ub = vasr($Vuu32.uh,$Vv32.ub):sat",
+tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrvwuhrndsat : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxWR:$Vuu32, HvxVR:$Vv32),
+"$Vd32.uh = vasr($Vuu32.w,$Vv32.uh):rnd:sat",
+tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrvwuhsat : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxWR:$Vuu32, HvxVR:$Vv32),
+"$Vd32.uh = vasr($Vuu32.w,$Vv32.uh):sat",
+tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vasrw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
@@ -31597,6 +31822,33 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vassign_fp : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.w = vfmv($Vu32.w)",
+tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vassign_tmp : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.tmp = $Vu32",
+tc_2120355e, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV69]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let hasHvxTmp = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vassignp : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32),
@@ -32000,6 +32252,189 @@ let isCVI = 1;
let isRegSequence = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vcombine_tmp : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vdd32.tmp = vcombine($Vu32,$Vv32)",
+tc_aa047364, TypeCVI_VX>, Enc_71bb9b, Requires<[UseHVXV69]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let hasHvxTmp = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_hf_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.hf = $Vu32.qf16",
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_hf_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxWR:$Vuu32),
+"$Vd32.hf = $Vuu32.qf32",
+tc_51d0ecc3, TypeCVI_VS>, Enc_a33d04, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_sf_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.sf = $Vu32.qf32",
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcvt_b_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.b = vcvt($Vu32.hf,$Vv32.hf)",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcvt_h_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.h = vcvt($Vu32.hf)",
+tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcvt_hf_b : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32),
+"$Vdd32.hf = vcvt($Vu32.b)",
+tc_0afc8be9, TypeCVI_VX_DV>, Enc_dd766a, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcvt_hf_h : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.hf = vcvt($Vu32.h)",
+tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcvt_hf_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.hf = vcvt($Vu32.sf,$Vv32.sf)",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcvt_hf_ub : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32),
+"$Vdd32.hf = vcvt($Vu32.ub)",
+tc_0afc8be9, TypeCVI_VX_DV>, Enc_dd766a, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcvt_hf_uh : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.hf = vcvt($Vu32.uh)",
+tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcvt_sf_hf : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32),
+"$Vdd32.sf = vcvt($Vu32.hf)",
+tc_0afc8be9, TypeCVI_VX_DV>, Enc_dd766a, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcvt_ub_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.ub = vcvt($Vu32.hf,$Vv32.hf)",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcvt_uh_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.uh = vcvt($Vu32.hf)",
+tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vd0 : HInst<
(outs HvxVR:$Vd32),
(ins),
@@ -32141,6 +32576,34 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vdmpy_sf_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.sf = vdmpy($Vu32.hf,$Vv32.hf)",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpy_sf_hf_acc : HInst<
+(outs HvxVR:$Vx32),
+(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vx32.sf += vdmpy($Vu32.hf,$Vv32.hf)",
+tc_a19b9305, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
def V6_vdmpybus : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
@@ -32415,7 +32878,7 @@ def V6_vdmpyhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat",
-tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -32428,7 +32891,7 @@ def V6_vdmpyhsat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat",
-tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_72e2b393, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -32523,7 +32986,7 @@ def V6_vdmpyhsusat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat",
-tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -32536,7 +32999,7 @@ def V6_vdmpyhsusat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat",
-tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_72e2b393, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -32577,7 +33040,7 @@ def V6_vdmpyhvsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat",
-tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_73efe966, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -32831,6 +33294,84 @@ let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
+def V6_vfmax_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.hf = vfmax($Vu32.hf,$Vv32.hf)",
+tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vfmax_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.sf = vfmax($Vu32.sf,$Vv32.sf)",
+tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vfmin_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.hf = vfmin($Vu32.hf,$Vv32.hf)",
+tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vfmin_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.sf = vfmin($Vu32.sf,$Vv32.sf)",
+tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vfneg_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.hf = vfneg($Vu32.hf)",
+tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vfneg_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.sf = vfneg($Vu32.sf)",
+tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vgathermh : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
@@ -32843,7 +33384,6 @@ let opNewValue = 0;
let accessSize = HalfWordAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
let DecoderNamespace = "EXT_mmvec";
@@ -32860,7 +33400,6 @@ let opNewValue = 0;
let accessSize = HalfWordAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
let DecoderNamespace = "EXT_mmvec";
@@ -32877,7 +33416,6 @@ let opNewValue = 0;
let accessSize = HalfWordAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
let DecoderNamespace = "EXT_mmvec";
@@ -32894,7 +33432,6 @@ let opNewValue = 0;
let accessSize = HalfWordAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
let DecoderNamespace = "EXT_mmvec";
@@ -32911,7 +33448,6 @@ let opNewValue = 0;
let accessSize = WordAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
let DecoderNamespace = "EXT_mmvec";
@@ -32928,7 +33464,6 @@ let opNewValue = 0;
let accessSize = WordAccess;
let isCVLoad = 1;
let isCVI = 1;
-let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
let DecoderNamespace = "EXT_mmvec";
@@ -33033,6 +33568,106 @@ let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
+def V6_vgthf : HInst<
+(outs HvxQR:$Qd4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.hf,$Vv32.hf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV68,UseHVXFloatingPoint]> {
+let Inst{7-2} = 0b011101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgthf_and : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.hf,$Vv32.hf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> {
+let Inst{7-2} = 0b110011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgthf_or : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.hf,$Vv32.hf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> {
+let Inst{7-2} = 0b001101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isAccumulator = 1;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgthf_xor : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.hf,$Vv32.hf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> {
+let Inst{7-2} = 0b111011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtsf : HInst<
+(outs HvxQR:$Qd4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.sf,$Vv32.sf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV68,UseHVXFloatingPoint]> {
+let Inst{7-2} = 0b011100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtsf_and : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.sf,$Vv32.sf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> {
+let Inst{7-2} = 0b110010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtsf_or : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.sf,$Vv32.sf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> {
+let Inst{7-2} = 0b001100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isAccumulator = 1;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtsf_xor : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.sf,$Vv32.sf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> {
+let Inst{7-2} = 0b111010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
def V6_vgtub : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -33552,6 +34187,32 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vmax_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.hf = vmax($Vu32.hf,$Vv32.hf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmax_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.sf = vmax($Vu32.sf,$Vv32.sf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vmaxb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -33677,6 +34338,32 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vmin_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.hf = vmin($Vu32.hf,$Vv32.hf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmin_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.sf = vmin($Vu32.sf,$Vv32.sf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vminb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -34110,6 +34797,179 @@ let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
+def V6_vmpy_hf_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.hf = vmpy($Vu32.hf,$Vv32.hf)",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_hf_hf_acc : HInst<
+(outs HvxVR:$Vx32),
+(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vx32.hf += vmpy($Vu32.hf,$Vv32.hf)",
+tc_a19b9305, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpy_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vmpy($Vu32.qf16,$Vv32.qf16)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_qf16_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vmpy($Vu32.hf,$Vv32.hf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_qf16_mix_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vmpy($Vu32.qf16,$Vv32.hf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vmpy($Vu32.qf32,$Vv32.qf32)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_qf32_hf : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vdd32.qf32 = vmpy($Vu32.hf,$Vv32.hf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_qf32_mix_hf : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vdd32.qf32 = vmpy($Vu32.qf16,$Vv32.hf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_qf32_qf16 : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vdd32.qf32 = vmpy($Vu32.qf16,$Vv32.qf16)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_qf32_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vmpy($Vu32.sf,$Vv32.sf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_sf_hf : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vdd32.sf = vmpy($Vu32.hf,$Vv32.hf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_sf_hf_acc : HInst<
+(outs HvxWR:$Vxx32),
+(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vxx32.sf += vmpy($Vu32.hf,$Vv32.hf)",
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpy_sf_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.sf = vmpy($Vu32.sf,$Vv32.sf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vmpybus : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
@@ -34397,7 +35257,7 @@ def V6_vmpyhsrs : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat",
-tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -34422,7 +35282,7 @@ def V6_vmpyhss : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat",
-tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -34555,7 +35415,7 @@ def V6_vmpyhvsrs : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat",
-tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_73efe966, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -35332,6 +36192,19 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vmpyuhvs : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.uh = vmpy($Vu32.uh,$Vv32.uh):>>16",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV69]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vmux : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32),
@@ -36007,7 +36880,7 @@ def V6_vrmpybusv_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)",
-tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_37820f4c, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -36061,7 +36934,7 @@ def V6_vrmpybv_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vrmpy($Vu32.b,$Vv32.b)",
-tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_37820f4c, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -36277,7 +37150,7 @@ def V6_vrmpyubv_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)",
-tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_37820f4c, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -37412,6 +38285,123 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vsub_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vsub($Vu32.hf,$Vv32.hf)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsub_hf_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.hf = vsub($Vu32.hf,$Vv32.hf)",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsub_qf16 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vsub($Vu32.qf16,$Vv32.qf16)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsub_qf16_mix : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vsub($Vu32.qf16,$Vv32.hf)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsub_qf32 : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vsub($Vu32.qf32,$Vv32.qf32)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsub_qf32_mix : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vsub($Vu32.qf32,$Vv32.sf)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsub_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vsub($Vu32.sf,$Vv32.sf)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsub_sf_hf : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vdd32.sf = vsub($Vu32.hf,$Vv32.hf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsub_sf_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.sf = vsub($Vu32.sf,$Vv32.sf)",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsubb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -38647,7 +39637,7 @@ def V6_zLd_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"z = vmem($Rt32+#$Ii)",
-tc_e699ae41, TypeCVI_ZW>, Enc_ff3442, Requires<[UseHVXV66,UseZReg]> {
+tc_e699ae41, TypeCVI_ZW>, Enc_ff3442, Requires<[UseHVXV66,UseZReg]>, PostInc_BaseImm {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101100000;
@@ -38655,13 +39645,14 @@ let addrMode = BaseImmOffset;
let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
+let CextOpcode = "V6_zLd";
let DecoderNamespace = "EXT_mmvec";
}
def V6_zLd_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"z = vmem($Rx32++#$Ii)",
-tc_a0dbea28, TypeCVI_ZW>, Enc_6c9ee0, Requires<[UseHVXV66,UseZReg]> {
+tc_a0dbea28, TypeCVI_ZW>, Enc_6c9ee0, Requires<[UseHVXV66,UseZReg]>, PostInc_BaseImm {
let Inst{7-0} = 0b00000000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101101000;
@@ -38669,6 +39660,7 @@ let addrMode = PostInc;
let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
+let CextOpcode = "V6_zLd";
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
}
@@ -38782,6 +39774,17 @@ let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b0110110000100000;
let isSolo = 1;
}
+def Y2_crswap_old : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in),
+"crswap($Rx32,sgp)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
def Y2_dccleana : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -38861,6 +39864,22 @@ let Inst{13-0} = 0b00000000000010;
let Inst{31-16} = 0b0101011111000000;
let isSolo = 1;
}
+def Y2_k1lock_map : HInst<
+(outs),
+(ins),
+"k1lock",
+PSEUDO, TypeMAPPING>, Requires<[HasV65]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def Y2_k1unlock_map : HInst<
+(outs),
+(ins),
+"k1unlock",
+PSEUDO, TypeMAPPING>, Requires<[HasV65]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
def Y2_syncht : HInst<
(outs),
(ins),
@@ -39083,7 +40102,7 @@ def dup_A2_add : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add($Rs32,$Rt32)",
-tc_388f9897, TypeALU32_3op>, Requires<[HasV68]> {
+tc_388f9897, TypeALU32_3op>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let AsmVariantName = "NonParsable";
@@ -39093,7 +40112,7 @@ def dup_A2_addi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = add($Rs32,#$Ii)",
-tc_388f9897, TypeALU32_ADDI>, Requires<[HasV68]> {
+tc_388f9897, TypeALU32_ADDI>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let AsmVariantName = "NonParsable";
@@ -39108,7 +40127,7 @@ def dup_A2_andir : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = and($Rs32,#$Ii)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let AsmVariantName = "NonParsable";
@@ -39123,7 +40142,7 @@ def dup_A2_combineii : HInst<
(outs DoubleRegs:$Rdd32),
(ins s32_0Imm:$Ii, s8_0Imm:$II),
"$Rdd32 = combine(#$Ii,#$II)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
let AsmVariantName = "NonParsable";
let isPseudo = 1;
let isExtendable = 1;
@@ -39136,7 +40155,7 @@ def dup_A2_sxtb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sxtb($Rs32)",
-tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> {
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let AsmVariantName = "NonParsable";
@@ -39146,7 +40165,7 @@ def dup_A2_sxth : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sxth($Rs32)",
-tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> {
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let AsmVariantName = "NonParsable";
@@ -39156,7 +40175,7 @@ def dup_A2_tfr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = $Rs32",
-tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> {
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let AsmVariantName = "NonParsable";
@@ -39166,7 +40185,7 @@ def dup_A2_tfrsi : HInst<
(outs IntRegs:$Rd32),
(ins s32_0Imm:$Ii),
"$Rd32 = #$Ii",
-tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> {
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let AsmVariantName = "NonParsable";
@@ -39181,7 +40200,7 @@ def dup_A2_zxtb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = zxtb($Rs32)",
-PSEUDO, TypeMAPPING>, Requires<[HasV68]> {
+PSEUDO, TypeMAPPING>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let AsmVariantName = "NonParsable";
@@ -39191,7 +40210,7 @@ def dup_A2_zxth : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = zxth($Rs32)",
-tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> {
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let AsmVariantName = "NonParsable";
@@ -39201,7 +40220,7 @@ def dup_A4_combineii : HInst<
(outs DoubleRegs:$Rdd32),
(ins s8_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = combine(#$Ii,#$II)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
let AsmVariantName = "NonParsable";
let isPseudo = 1;
let isExtendable = 1;
@@ -39214,7 +40233,7 @@ def dup_A4_combineir : HInst<
(outs DoubleRegs:$Rdd32),
(ins s32_0Imm:$Ii, IntRegs:$Rs32),
"$Rdd32 = combine(#$Ii,$Rs32)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
let AsmVariantName = "NonParsable";
let isPseudo = 1;
let isExtendable = 1;
@@ -39227,7 +40246,7 @@ def dup_A4_combineri : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rdd32 = combine($Rs32,#$Ii)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
let AsmVariantName = "NonParsable";
let isPseudo = 1;
let isExtendable = 1;
@@ -39240,7 +40259,7 @@ def dup_C2_cmoveif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if (!$Pu4) $Rd32 = #$Ii",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
@@ -39257,7 +40276,7 @@ def dup_C2_cmoveit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if ($Pu4) $Rd32 = #$Ii",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
@@ -39273,7 +40292,7 @@ def dup_C2_cmovenewif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if (!$Pu4.new) $Rd32 = #$Ii",
-tc_4ac61d92, TypeALU32_2op>, Requires<[HasV68]> {
+tc_4ac61d92, TypeALU32_2op>, Requires<[HasV69]> {
let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
@@ -39291,7 +40310,7 @@ def dup_C2_cmovenewit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if ($Pu4.new) $Rd32 = #$Ii",
-tc_4ac61d92, TypeALU32_2op>, Requires<[HasV68]> {
+tc_4ac61d92, TypeALU32_2op>, Requires<[HasV69]> {
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
@@ -39308,7 +40327,7 @@ def dup_C2_cmpeqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmp.eq($Rs32,#$Ii)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
let AsmVariantName = "NonParsable";
let isPseudo = 1;
let isExtendable = 1;
@@ -39321,7 +40340,7 @@ def dup_L2_deallocframe : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = deallocframe($Rs32):raw",
-tc_aee6250c, TypeLD>, Requires<[HasV68]> {
+tc_aee6250c, TypeLD>, Requires<[HasV69]> {
let accessSize = DoubleWordAccess;
let AsmVariantName = "NonParsable";
let mayLoad = 1;
@@ -39333,7 +40352,7 @@ def dup_L2_loadrb_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = memb($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV68]> {
+tc_eed07714, TypeLD>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let addrMode = BaseImmOffset;
@@ -39351,7 +40370,7 @@ def dup_L2_loadrd_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s29_3Imm:$Ii),
"$Rdd32 = memd($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV68]> {
+tc_eed07714, TypeLD>, Requires<[HasV69]> {
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let AsmVariantName = "NonParsable";
@@ -39367,7 +40386,7 @@ def dup_L2_loadrh_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memh($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV68]> {
+tc_eed07714, TypeLD>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let addrMode = BaseImmOffset;
@@ -39385,7 +40404,7 @@ def dup_L2_loadri_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s30_2Imm:$Ii),
"$Rd32 = memw($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV68]> {
+tc_eed07714, TypeLD>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let addrMode = BaseImmOffset;
@@ -39403,7 +40422,7 @@ def dup_L2_loadrub_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = memub($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV68]> {
+tc_eed07714, TypeLD>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let addrMode = BaseImmOffset;
@@ -39421,7 +40440,7 @@ def dup_L2_loadruh_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memuh($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV68]> {
+tc_eed07714, TypeLD>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let addrMode = BaseImmOffset;
@@ -39439,7 +40458,7 @@ def dup_S2_allocframe : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u11_3Imm:$Ii),
"allocframe($Rx32,#$Ii):raw",
-tc_74a42bda, TypeST>, Requires<[HasV68]> {
+tc_74a42bda, TypeST>, Requires<[HasV69]> {
let hasNewValue = 1;
let opNewValue = 0;
let addrMode = BaseImmOffset;
@@ -39455,7 +40474,7 @@ def dup_S2_storerb_io : HInst<
(outs),
(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) = $Rt32",
-tc_a9edeffa, TypeST>, Requires<[HasV68]> {
+tc_a9edeffa, TypeST>, Requires<[HasV69]> {
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let AsmVariantName = "NonParsable";
@@ -39471,7 +40490,7 @@ def dup_S2_storerd_io : HInst<
(outs),
(ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd($Rs32+#$Ii) = $Rtt32",
-tc_a9edeffa, TypeST>, Requires<[HasV68]> {
+tc_a9edeffa, TypeST>, Requires<[HasV69]> {
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let AsmVariantName = "NonParsable";
@@ -39487,7 +40506,7 @@ def dup_S2_storerh_io : HInst<
(outs),
(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) = $Rt32",
-tc_a9edeffa, TypeST>, Requires<[HasV68]> {
+tc_a9edeffa, TypeST>, Requires<[HasV69]> {
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let AsmVariantName = "NonParsable";
@@ -39503,7 +40522,7 @@ def dup_S2_storeri_io : HInst<
(outs),
(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) = $Rt32",
-tc_a9edeffa, TypeST>, Requires<[HasV68]> {
+tc_a9edeffa, TypeST>, Requires<[HasV69]> {
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let AsmVariantName = "NonParsable";
@@ -39519,7 +40538,7 @@ def dup_S4_storeirb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"memb($Rs32+#$Ii) = #$II",
-tc_838c4d7a, TypeV4LDST>, Requires<[HasV68]> {
+tc_838c4d7a, TypeV4LDST>, Requires<[HasV69]> {
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let AsmVariantName = "NonParsable";
@@ -39535,7 +40554,7 @@ def dup_S4_storeiri_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"memw($Rs32+#$Ii) = #$II",
-tc_838c4d7a, TypeV4LDST>, Requires<[HasV68]> {
+tc_838c4d7a, TypeV4LDST>, Requires<[HasV69]> {
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let AsmVariantName = "NonParsable";
diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index e5c78d122c9e..64bc5091d1d1 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -1661,8 +1661,6 @@ def: Pat<(int_hexagon_Y2_dccleana IntRegs:$src1),
(Y2_dccleana IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_Y2_dccleaninva IntRegs:$src1),
(Y2_dccleaninva IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_Y2_dcfetch IntRegs:$src1),
- (Y2_dcfetch IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_Y2_dcinva IntRegs:$src1),
(Y2_dcinva IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_Y2_dczeroa IntRegs:$src1),
@@ -3380,3 +3378,294 @@ def: Pat<(int_hexagon_V6_v6mpyvubs10_vxx HvxWR:$src1, HvxWR:$src2, HvxWR:$src3,
(V6_v6mpyvubs10_vxx HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV68, UseHVX64B]>;
def: Pat<(int_hexagon_V6_v6mpyvubs10_vxx_128B HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, u2_0ImmPred_timm:$src4),
(V6_v6mpyvubs10_vxx HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabs_hf HvxVR:$src1),
+ (V6_vabs_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabs_hf_128B HvxVR:$src1),
+ (V6_vabs_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabs_sf HvxVR:$src1),
+ (V6_vabs_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabs_sf_128B HvxVR:$src1),
+ (V6_vabs_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadd_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_hf_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadd_hf_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadd_qf16 HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_qf16_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_qf16_mix HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_qf16_mix_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_qf32 HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_qf32_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_qf32_mix HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_qf32_mix_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vadd_sf_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadd_sf_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadd_sf_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadd_sf_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadd_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vassign_fp HvxVR:$src1),
+ (V6_vassign_fp HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vassign_fp_128B HvxVR:$src1),
+ (V6_vassign_fp HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vconv_hf_qf16 HvxVR:$src1),
+ (V6_vconv_hf_qf16 HvxVR:$src1)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_hf_qf16_128B HvxVR:$src1),
+ (V6_vconv_hf_qf16 HvxVR:$src1)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_hf_qf32 HvxWR:$src1),
+ (V6_vconv_hf_qf32 HvxWR:$src1)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_hf_qf32_128B HvxWR:$src1),
+ (V6_vconv_hf_qf32 HvxWR:$src1)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_sf_qf32 HvxVR:$src1),
+ (V6_vconv_sf_qf32 HvxVR:$src1)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vconv_sf_qf32_128B HvxVR:$src1),
+ (V6_vconv_sf_qf32 HvxVR:$src1)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vcvt_b_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vcvt_b_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_b_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vcvt_b_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcvt_h_hf HvxVR:$src1),
+ (V6_vcvt_h_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_h_hf_128B HvxVR:$src1),
+ (V6_vcvt_h_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcvt_hf_b HvxVR:$src1),
+ (V6_vcvt_hf_b HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_hf_b_128B HvxVR:$src1),
+ (V6_vcvt_hf_b HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcvt_hf_h HvxVR:$src1),
+ (V6_vcvt_hf_h HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_hf_h_128B HvxVR:$src1),
+ (V6_vcvt_hf_h HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcvt_hf_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vcvt_hf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_hf_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vcvt_hf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcvt_hf_ub HvxVR:$src1),
+ (V6_vcvt_hf_ub HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_hf_ub_128B HvxVR:$src1),
+ (V6_vcvt_hf_ub HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcvt_hf_uh HvxVR:$src1),
+ (V6_vcvt_hf_uh HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_hf_uh_128B HvxVR:$src1),
+ (V6_vcvt_hf_uh HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcvt_sf_hf HvxVR:$src1),
+ (V6_vcvt_sf_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_sf_hf_128B HvxVR:$src1),
+ (V6_vcvt_sf_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcvt_ub_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vcvt_ub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_ub_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vcvt_ub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcvt_uh_hf HvxVR:$src1),
+ (V6_vcvt_uh_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_uh_hf_128B HvxVR:$src1),
+ (V6_vcvt_uh_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpy_sf_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vdmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpy_sf_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vdmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpy_sf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vdmpy_sf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpy_sf_hf_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vdmpy_sf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vfmax_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vfmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vfmax_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vfmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vfmax_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vfmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vfmax_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vfmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vfmin_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vfmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vfmin_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vfmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vfmin_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vfmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vfmin_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vfmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vfneg_hf HvxVR:$src1),
+ (V6_vfneg_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vfneg_hf_128B HvxVR:$src1),
+ (V6_vfneg_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vfneg_sf HvxVR:$src1),
+ (V6_vfneg_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vfneg_sf_128B HvxVR:$src1),
+ (V6_vfneg_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgthf HvxVR:$src1, HvxVR:$src2),
+ (V6_vgthf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgthf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgthf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgthf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgthf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgthf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgthf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgthf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgthf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgthf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgthf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgthf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgthf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgthf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgthf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtsf HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtsf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtsf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtsf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtsf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtsf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtsf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmax_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmax_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmax_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmax_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmin_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmin_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmin_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmin_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_hf_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpy_hf_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpy_hf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpy_hf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpy_hf_hf_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpy_hf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpy_qf16 HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf16_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf16_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf16_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf16_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf16_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf16_mix_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf16_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf16_mix_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf16_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf32 HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf32_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf32_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf32_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf32_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf32_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf32_mix_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf32_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf32_mix_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf32_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf32_qf16 HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf32_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf32_qf16_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf32_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf32_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf32_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_qf32_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_qf32_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmpy_sf_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpy_sf_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpy_sf_hf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpy_sf_hf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpy_sf_hf_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpy_sf_hf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpy_sf_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpy_sf_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpy_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsub_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_hf_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsub_hf_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsub_qf16 HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_qf16_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_qf16_mix HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_qf16_mix_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_qf32 HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_qf32_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_qf32_mix HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_qf32_mix_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_sf_hf HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsub_sf_hf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsub_sf_sf HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsub_sf_sf_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>;
+
+// V69 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vasrvuhubrndsat HvxWR:$src1, HvxVR:$src2),
+ (V6_vasrvuhubrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrvuhubrndsat_128B HvxWR:$src1, HvxVR:$src2),
+ (V6_vasrvuhubrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrvuhubsat HvxWR:$src1, HvxVR:$src2),
+ (V6_vasrvuhubsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrvuhubsat_128B HvxWR:$src1, HvxVR:$src2),
+ (V6_vasrvuhubsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrvwuhrndsat HvxWR:$src1, HvxVR:$src2),
+ (V6_vasrvwuhrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrvwuhrndsat_128B HvxWR:$src1, HvxVR:$src2),
+ (V6_vasrvwuhrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrvwuhsat HvxWR:$src1, HvxVR:$src2),
+ (V6_vasrvwuhsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrvwuhsat_128B HvxWR:$src1, HvxVR:$src2),
+ (V6_vasrvwuhsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhvs_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>;
diff --git a/llvm/lib/Target/Hexagon/HexagonDepMappings.td b/llvm/lib/Target/Hexagon/HexagonDepMappings.td
index 919cb996ad15..2f7b76b893a9 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepMappings.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepMappings.td
@@ -174,7 +174,6 @@ def V6_ldcpnt0Alias : InstAlias<"if ($Pv4) $Vd32.cur = vmem($Rt32):nt", (V6_vL32
def V6_ldnp0Alias : InstAlias<"if (!$Pv4) $Vd32 = vmem($Rt32)", (V6_vL32b_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>;
def V6_ldnpnt0Alias : InstAlias<"if (!$Pv4) $Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>;
def V6_ldnt0Alias : InstAlias<"$Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
-def V6_ldntnt0Alias : InstAlias<"$Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>;
def V6_ldp0Alias : InstAlias<"if ($Pv4) $Vd32 = vmem($Rt32)", (V6_vL32b_pred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
def V6_ldpnt0Alias : InstAlias<"if ($Pv4) $Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_pred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
def V6_ldtnp0Alias : InstAlias<"if (!$Pv4) $Vd32.tmp = vmem($Rt32)", (V6_vL32b_npred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 46c1fbc6eeb2..85230cac9d7c 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -1445,8 +1445,8 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
MachineBasicBlock *B = N->getBlock();
std::vector<MachineInstr*> Instrs;
- for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
- Instrs.push_back(&*I);
+ for (MachineInstr &MI : llvm::reverse(*B))
+ Instrs.push_back(&MI);
for (MachineInstr *MI : Instrs) {
unsigned Opc = MI->getOpcode();
diff --git a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
index e45126bec6ef..44679d429de5 100644
--- a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
@@ -60,7 +60,7 @@ HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) {
RetVal = NoHazard;
LLVM_DEBUG(dbgs() << "*** Try .new version? " << (RetVal == NoHazard)
<< "\n");
- MF->DeleteMachineInstr(NewMI);
+ MF->deleteMachineInstr(NewMI);
}
return RetVal;
}
@@ -129,7 +129,7 @@ void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) {
MI->getDebugLoc());
assert(Resources->canReserveResources(*NewMI));
Resources->reserveResources(*NewMI);
- MF->DeleteMachineInstr(NewMI);
+ MF->deleteMachineInstr(NewMI);
}
else
Resources->reserveResources(*MI);
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index 45adaf50774f..898ef51bd48f 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -146,9 +146,6 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
bits<1> isFP = 0;
let TSFlags {50} = isFP; // Floating-point.
- bits<1> isSomeOK = 0;
- let TSFlags {51} = isSomeOK; // Relax some grouping constraints.
-
bits<1> hasNewValue2 = 0;
let TSFlags{52} = hasNewValue2; // Second New-value producer insn.
bits<3> opNewValue2 = 0;
@@ -160,8 +157,8 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
bits<1> prefersSlot3 = 0;
let TSFlags{57} = prefersSlot3; // Complex XU
- bits<1> hasTmpDst = 0;
- let TSFlags{60} = hasTmpDst; // v65 : 'fake" register VTMP is set
+ bits<1> hasHvxTmp = 0;
+ let TSFlags{60} = hasHvxTmp; // vector register vX.tmp false-write
bit CVINew = 0;
let TSFlags{62} = CVINew;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index b6984d40f78e..931b0c0e0090 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -4655,3 +4656,11 @@ short HexagonInstrInfo::changeAddrMode_rr_ur(short Opc) const {
short HexagonInstrInfo::changeAddrMode_ur_rr(short Opc) const {
return Opc >= 0 ? Hexagon::changeAddrMode_ur_rr(Opc) : Opc;
}
+
+MCInst HexagonInstrInfo::getNop() const {
+ static const MCInst Nop = MCInstBuilder(Hexagon::A2_nop);
+
+ return MCInstBuilder(Hexagon::BUNDLE)
+ .addImm(0)
+ .addInst(&Nop);
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index eaaf9f7046c7..830f04d9eac3 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -524,6 +524,8 @@ public:
short changeAddrMode_ur_rr(const MachineInstr &MI) const {
return changeAddrMode_ur_rr(MI.getOpcode());
}
+
+ MCInst getNop() const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 987c4a5fa6c4..d5c34ac467c3 100644
--- a/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -104,6 +104,19 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
HexagonMCInstrInfo::setOuterLoop(MCB);
return;
}
+ if (MI->getOpcode() == Hexagon::PATCHABLE_FUNCTION_ENTER) {
+ AP.EmitSled(*MI, HexagonAsmPrinter::SledKind::FUNCTION_ENTER);
+ return;
+ }
+ if (MI->getOpcode() == Hexagon::PATCHABLE_FUNCTION_EXIT) {
+ AP.EmitSled(*MI, HexagonAsmPrinter::SledKind::FUNCTION_EXIT);
+ return;
+ }
+ if (MI->getOpcode() == Hexagon::PATCHABLE_TAIL_CALL) {
+ AP.EmitSled(*MI, HexagonAsmPrinter::SledKind::TAIL_CALL);
+ return;
+ }
+
MCInst *MCI = AP.OutContext.createMCInst();
MCI->setOpcode(MI->getOpcode());
assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 60d58f421bbb..53e82ac66b85 100644
--- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -14,676 +14,44 @@
#include "HexagonMachineScheduler.h"
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <iomanip>
-#include <limits>
-#include <memory>
-#include <sstream>
+#include "llvm/CodeGen/VLIWMachineScheduler.h"
using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
-static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
- cl::Hidden, cl::ZeroOrMore, cl::init(false));
-
-static cl::opt<bool> UseNewerCandidate("use-newer-candidate",
- cl::Hidden, cl::ZeroOrMore, cl::init(true));
-
-static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
- cl::Hidden, cl::ZeroOrMore, cl::init(1));
-
-// Check if the scheduler should penalize instructions that are available to
-// early due to a zero-latency dependence.
-static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
- cl::ZeroOrMore, cl::init(true));
-
-// This value is used to determine if a register class is a high pressure set.
-// We compute the maximum number of registers needed and divided by the total
-// available. Then, we compare the result to this value.
-static cl::opt<float> RPThreshold("hexagon-reg-pressure", cl::Hidden,
- cl::init(0.75f), cl::desc("High register pressure threhold."));
-
/// Return true if there is a dependence between SUd and SUu.
-static bool hasDependence(const SUnit *SUd, const SUnit *SUu,
- const HexagonInstrInfo &QII) {
- if (SUd->Succs.size() == 0)
- return false;
+bool HexagonVLIWResourceModel::hasDependence(const SUnit *SUd,
+ const SUnit *SUu) {
+ const auto *QII = static_cast<const HexagonInstrInfo *>(TII);
// Enable .cur formation.
- if (QII.mayBeCurLoad(*SUd->getInstr()))
+ if (QII->mayBeCurLoad(*SUd->getInstr()))
return false;
- if (QII.canExecuteInBundle(*SUd->getInstr(), *SUu->getInstr()))
- return false;
-
- for (const auto &S : SUd->Succs) {
- // Since we do not add pseudos to packets, might as well
- // ignore order dependencies.
- if (S.isCtrl())
- continue;
-
- if (S.getSUnit() == SUu && S.getLatency() > 0)
- return true;
- }
- return false;
-}
-
-/// Check if scheduling of this SU is possible
-/// in the current packet.
-/// It is _not_ precise (statefull), it is more like
-/// another heuristic. Many corner cases are figured
-/// empirically.
-bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
- if (!SU || !SU->getInstr())
+ if (QII->canExecuteInBundle(*SUd->getInstr(), *SUu->getInstr()))
return false;
- // First see if the pipeline could receive this instruction
- // in the current cycle.
- switch (SU->getInstr()->getOpcode()) {
- default:
- if (!ResourcesModel->canReserveResources(*SU->getInstr()))
- return false;
- break;
- case TargetOpcode::EXTRACT_SUBREG:
- case TargetOpcode::INSERT_SUBREG:
- case TargetOpcode::SUBREG_TO_REG:
- case TargetOpcode::REG_SEQUENCE:
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::COPY:
- case TargetOpcode::INLINEASM:
- case TargetOpcode::INLINEASM_BR:
- break;
- }
-
- MachineBasicBlock *MBB = SU->getInstr()->getParent();
- auto &QST = MBB->getParent()->getSubtarget<HexagonSubtarget>();
- const auto &QII = *QST.getInstrInfo();
-
- // Now see if there are no other dependencies to instructions already
- // in the packet.
- if (IsTop) {
- for (unsigned i = 0, e = Packet.size(); i != e; ++i)
- if (hasDependence(Packet[i], SU, QII))
- return false;
- } else {
- for (unsigned i = 0, e = Packet.size(); i != e; ++i)
- if (hasDependence(SU, Packet[i], QII))
- return false;
- }
- return true;
-}
-
-/// Keep track of available resources.
-bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
- bool startNewCycle = false;
- // Artificially reset state.
- if (!SU) {
- ResourcesModel->clearResources();
- Packet.clear();
- TotalPackets++;
- return false;
- }
- // If this SU does not fit in the packet or the packet is now full
- // start a new one.
- if (!isResourceAvailable(SU, IsTop) ||
- Packet.size() >= SchedModel->getIssueWidth()) {
- ResourcesModel->clearResources();
- Packet.clear();
- TotalPackets++;
- startNewCycle = true;
- }
-
- switch (SU->getInstr()->getOpcode()) {
- default:
- ResourcesModel->reserveResources(*SU->getInstr());
- break;
- case TargetOpcode::EXTRACT_SUBREG:
- case TargetOpcode::INSERT_SUBREG:
- case TargetOpcode::SUBREG_TO_REG:
- case TargetOpcode::REG_SEQUENCE:
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::CFI_INSTRUCTION:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::COPY:
- case TargetOpcode::INLINEASM:
- case TargetOpcode::INLINEASM_BR:
- break;
- }
- Packet.push_back(SU);
-
-#ifndef NDEBUG
- LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
- for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
- LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
- LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
- LLVM_DEBUG(Packet[i]->getInstr()->dump());
- }
-#endif
-
- return startNewCycle;
+ return VLIWResourceModel::hasDependence(SUd, SUu);
}
-/// schedule - Called back from MachineScheduler::runOnMachineFunction
-/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
-/// only includes instructions that have DAG nodes, not scheduling boundaries.
-void VLIWMachineScheduler::schedule() {
- LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
- << printMBBReference(*BB) << " " << BB->getName()
- << " in_func " << BB->getParent()->getName()
- << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
-
- buildDAGWithRegPressure();
-
- Topo.InitDAGTopologicalSorting();
-
- // Postprocess the DAG to add platform-specific artificial dependencies.
- postprocessDAG();
-
- SmallVector<SUnit*, 8> TopRoots, BotRoots;
- findRootsAndBiasEdges(TopRoots, BotRoots);
-
- // Initialize the strategy before modifying the DAG.
- SchedImpl->initialize(this);
-
- LLVM_DEBUG(unsigned maxH = 0;
- for (unsigned su = 0, e = SUnits.size(); su != e;
- ++su) if (SUnits[su].getHeight() > maxH) maxH =
- SUnits[su].getHeight();
- dbgs() << "Max Height " << maxH << "\n";);
- LLVM_DEBUG(unsigned maxD = 0;
- for (unsigned su = 0, e = SUnits.size(); su != e;
- ++su) if (SUnits[su].getDepth() > maxD) maxD =
- SUnits[su].getDepth();
- dbgs() << "Max Depth " << maxD << "\n";);
- LLVM_DEBUG(dump());
-
- initQueues(TopRoots, BotRoots);
-
- bool IsTopNode = false;
- while (true) {
- LLVM_DEBUG(
- dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
- SUnit *SU = SchedImpl->pickNode(IsTopNode);
- if (!SU) break;
-
- if (!checkSchedLimit())
- break;
-
- scheduleMI(SU, IsTopNode);
-
- // Notify the scheduling strategy after updating the DAG.
- SchedImpl->schedNode(SU, IsTopNode);
-
- updateQueues(SU, IsTopNode);
- }
- assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
-
- placeDebugValues();
-
- LLVM_DEBUG({
- dbgs() << "*** Final schedule for "
- << printMBBReference(*begin()->getParent()) << " ***\n";
- dumpSchedule();
- dbgs() << '\n';
- });
+VLIWResourceModel *HexagonConvergingVLIWScheduler::createVLIWResourceModel(
+ const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
+ return new HexagonVLIWResourceModel(STI, SchedModel);
}
-void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
- DAG = static_cast<VLIWMachineScheduler*>(dag);
- SchedModel = DAG->getSchedModel();
-
- Top.init(DAG, SchedModel);
- Bot.init(DAG, SchedModel);
-
- // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
- // are disabled, then these HazardRecs will be disabled.
- const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
- const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
- const TargetInstrInfo *TII = STI.getInstrInfo();
- delete Top.HazardRec;
- delete Bot.HazardRec;
- Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
- Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
-
- delete Top.ResourceModel;
- delete Bot.ResourceModel;
- Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
- Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
-
- const std::vector<unsigned> &MaxPressure =
- DAG->getRegPressure().MaxSetPressure;
- HighPressureSets.assign(MaxPressure.size(), 0);
- for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
- unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
- HighPressureSets[i] =
- ((float) MaxPressure[i] > ((float) Limit * RPThreshold));
- }
-
- assert((!ForceTopDown || !ForceBottomUp) &&
- "-misched-topdown incompatible with -misched-bottomup");
-}
-
-void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
- for (const SDep &PI : SU->Preds) {
- unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
- unsigned MinLatency = PI.getLatency();
-#ifndef NDEBUG
- Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
-#endif
- if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
- SU->TopReadyCycle = PredReadyCycle + MinLatency;
- }
-
- if (!SU->isScheduled)
- Top.releaseNode(SU, SU->TopReadyCycle);
-}
-
-void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
- assert(SU->getInstr() && "Scheduled SUnit must have instr");
-
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
- unsigned MinLatency = I->getLatency();
-#ifndef NDEBUG
- Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
-#endif
- if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
- SU->BotReadyCycle = SuccReadyCycle + MinLatency;
- }
-
- if (!SU->isScheduled)
- Bot.releaseNode(SU, SU->BotReadyCycle);
-}
-
-/// Does this SU have a hazard within the current instruction group.
-///
-/// The scheduler supports two modes of hazard recognition. The first is the
-/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
-/// supports highly complicated in-order reservation tables
-/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
-///
-/// The second is a streamlined mechanism that checks for hazards based on
-/// simple counters that the scheduler itself maintains. It explicitly checks
-/// for instruction dispatch limitations, including the number of micro-ops that
-/// can dispatch per cycle.
-///
-/// TODO: Also check whether the SU must start a new group.
-bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
- if (HazardRec->isEnabled())
- return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
-
- unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
- if (IssueCount + uops > SchedModel->getIssueWidth())
- return true;
-
- return false;
-}
-
-void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU,
- unsigned ReadyCycle) {
- if (ReadyCycle < MinReadyCycle)
- MinReadyCycle = ReadyCycle;
-
- // Check for interlocks first. For the purpose of other heuristics, an
- // instruction that cannot issue appears as if it's not in the ReadyQueue.
- if (ReadyCycle > CurrCycle || checkHazard(SU))
-
- Pending.push(SU);
- else
- Available.push(SU);
-}
-
-/// Move the boundary of scheduled code by one cycle.
-void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
- unsigned Width = SchedModel->getIssueWidth();
- IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
-
- assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
- "MinReadyCycle uninitialized");
- unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
-
- if (!HazardRec->isEnabled()) {
- // Bypass HazardRec virtual calls.
- CurrCycle = NextCycle;
- } else {
- // Bypass getHazardType calls in case of long latency.
- for (; CurrCycle != NextCycle; ++CurrCycle) {
- if (isTop())
- HazardRec->AdvanceCycle();
- else
- HazardRec->RecedeCycle();
- }
- }
- CheckPending = true;
-
- LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
- << CurrCycle << '\n');
-}
-
-/// Move the boundary of scheduled code by one SUnit.
-void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
- bool startNewCycle = false;
-
- // Update the reservation table.
- if (HazardRec->isEnabled()) {
- if (!isTop() && SU->isCall) {
- // Calls are scheduled with their preceding instructions. For bottom-up
- // scheduling, clear the pipeline state before emitting.
- HazardRec->Reset();
- }
- HazardRec->EmitInstruction(SU);
- }
-
- // Update DFA model.
- startNewCycle = ResourceModel->reserveResources(SU, isTop());
-
- // Check the instruction group dispatch limit.
- // TODO: Check if this SU must end a dispatch group.
- IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
- if (startNewCycle) {
- LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
- bumpCycle();
- }
- else
- LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
- << CurrCycle << '\n');
-}
-
-/// Release pending ready nodes in to the available queue. This makes them
-/// visible to heuristics.
-void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
- // If the available queue is empty, it is safe to reset MinReadyCycle.
- if (Available.empty())
- MinReadyCycle = std::numeric_limits<unsigned>::max();
-
- // Check to see if any of the pending instructions are ready to issue. If
- // so, add them to the available queue.
- for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
- SUnit *SU = *(Pending.begin()+i);
- unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
-
- if (ReadyCycle < MinReadyCycle)
- MinReadyCycle = ReadyCycle;
-
- if (ReadyCycle > CurrCycle)
- continue;
-
- if (checkHazard(SU))
- continue;
-
- Available.push(SU);
- Pending.remove(Pending.begin()+i);
- --i; --e;
- }
- CheckPending = false;
-}
-
-/// Remove SU from the ready set for this boundary.
-void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
- if (Available.isInQueue(SU))
- Available.remove(Available.find(SU));
- else {
- assert(Pending.isInQueue(SU) && "bad ready count");
- Pending.remove(Pending.find(SU));
- }
-}
+int HexagonConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
+ SchedCandidate &Candidate,
+ RegPressureDelta &Delta,
+ bool verbose) {
+ int ResCount =
+ ConvergingVLIWScheduler::SchedulingCost(Q, SU, Candidate, Delta, verbose);
-/// If this queue only has one ready candidate, return it. As a side effect,
-/// advance the cycle until at least one node is ready. If multiple instructions
-/// are ready, return NULL.
-SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
- if (CheckPending)
- releasePending();
-
- auto AdvanceCycle = [this]() {
- if (Available.empty())
- return true;
- if (Available.size() == 1 && Pending.size() > 0)
- return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
- getWeakLeft(*Available.begin(), isTop()) != 0;
- return false;
- };
- for (unsigned i = 0; AdvanceCycle(); ++i) {
- assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
- "permanent hazard"); (void)i;
- ResourceModel->reserveResources(nullptr, isTop());
- bumpCycle();
- releasePending();
- }
- if (Available.size() == 1)
- return *Available.begin();
- return nullptr;
-}
-
-#ifndef NDEBUG
-void ConvergingVLIWScheduler::traceCandidate(const char *Label,
- const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) {
- dbgs() << Label << " " << Q.getName() << " ";
- if (P.isValid())
- dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
- << P.getUnitInc() << " ";
- else
- dbgs() << " ";
- dbgs() << "cost(" << Cost << ")\t";
- DAG->dumpNode(*SU);
-}
-
-// Very detailed queue dump, to be used with higher verbosity levels.
-void ConvergingVLIWScheduler::readyQueueVerboseDump(
- const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
- ReadyQueue &Q) {
- RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
-
- dbgs() << ">>> " << Q.getName() << "\n";
- for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
- RegPressureDelta RPDelta;
- TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
- DAG->getRegionCriticalPSets(),
- DAG->getRegPressure().MaxSetPressure);
- std::stringstream dbgstr;
- dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
- dbgs() << dbgstr.str();
- SchedulingCost(Q, *I, Candidate, RPDelta, true);
- dbgs() << "\t";
- (*I)->getInstr()->dump();
- }
- dbgs() << "\n";
-}
-#endif
-
-/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
-/// of SU, return true (we may have duplicates)
-static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
- if (SU->NumPredsLeft == 0)
- return false;
-
- for (auto &Pred : SU->Preds) {
- // We found an available, but not scheduled, predecessor.
- if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
- return false;
- }
-
- return true;
-}
-
-/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
-/// of SU, return true (we may have duplicates)
-static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
- if (SU->NumSuccsLeft == 0)
- return false;
-
- for (auto &Succ : SU->Succs) {
- // We found an available, but not scheduled, successor.
- if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
- return false;
- }
- return true;
-}
-
-/// Check if the instruction changes the register pressure of a register in the
-/// high pressure set. The function returns a negative value if the pressure
-/// decreases and a positive value is the pressure increases. If the instruction
-/// doesn't use a high pressure register or doesn't change the register
-/// pressure, then return 0.
-int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
- PressureDiff &PD = DAG->getPressureDiff(SU);
- for (auto &P : PD) {
- if (!P.isValid())
- continue;
- // The pressure differences are computed bottom-up, so the comparision for
- // an increase is positive in the bottom direction, but negative in the
- // top-down direction.
- if (HighPressureSets[P.getPSet()])
- return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
- }
- return 0;
-}
-
-// Constants used to denote relative importance of
-// heuristic components for cost computation.
-static const unsigned PriorityOne = 200;
-static const unsigned PriorityTwo = 50;
-static const unsigned PriorityThree = 75;
-static const unsigned ScaleTwo = 10;
-
-/// Single point to compute overall scheduling cost.
-/// TODO: More heuristics will be used soon.
-int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
- SchedCandidate &Candidate,
- RegPressureDelta &Delta,
- bool verbose) {
- // Initial trivial priority.
- int ResCount = 1;
-
- // Do not waste time on a node that is already scheduled.
if (!SU || SU->isScheduled)
return ResCount;
- LLVM_DEBUG(if (verbose) dbgs()
- << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
- // Forced priority is high.
- if (SU->isScheduleHigh) {
- ResCount += PriorityOne;
- LLVM_DEBUG(dbgs() << "H|");
- }
-
- unsigned IsAvailableAmt = 0;
- // Critical path first.
- if (Q.getID() == TopQID) {
- if (Top.isLatencyBound(SU)) {
- LLVM_DEBUG(if (verbose) dbgs() << "LB|");
- ResCount += (SU->getHeight() * ScaleTwo);
- }
-
- LLVM_DEBUG(if (verbose) {
- std::stringstream dbgstr;
- dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
- dbgs() << dbgstr.str();
- });
-
- // If resources are available for it, multiply the
- // chance of scheduling.
- if (Top.ResourceModel->isResourceAvailable(SU, true)) {
- IsAvailableAmt = (PriorityTwo + PriorityThree);
- ResCount += IsAvailableAmt;
- LLVM_DEBUG(if (verbose) dbgs() << "A|");
- } else
- LLVM_DEBUG(if (verbose) dbgs() << " |");
- } else {
- if (Bot.isLatencyBound(SU)) {
- LLVM_DEBUG(if (verbose) dbgs() << "LB|");
- ResCount += (SU->getDepth() * ScaleTwo);
- }
-
- LLVM_DEBUG(if (verbose) {
- std::stringstream dbgstr;
- dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
- dbgs() << dbgstr.str();
- });
-
- // If resources are available for it, multiply the
- // chance of scheduling.
- if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
- IsAvailableAmt = (PriorityTwo + PriorityThree);
- ResCount += IsAvailableAmt;
- LLVM_DEBUG(if (verbose) dbgs() << "A|");
- } else
- LLVM_DEBUG(if (verbose) dbgs() << " |");
- }
-
- unsigned NumNodesBlocking = 0;
- if (Q.getID() == TopQID) {
- // How many SUs does it block from scheduling?
- // Look at all of the successors of this node.
- // Count the number of nodes that
- // this node is the sole unscheduled node for.
- if (Top.isLatencyBound(SU))
- for (const SDep &SI : SU->Succs)
- if (isSingleUnscheduledPred(SI.getSUnit(), SU))
- ++NumNodesBlocking;
- } else {
- // How many unscheduled predecessors block this node?
- if (Bot.isLatencyBound(SU))
- for (const SDep &PI : SU->Preds)
- if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
- ++NumNodesBlocking;
- }
- ResCount += (NumNodesBlocking * ScaleTwo);
-
- LLVM_DEBUG(if (verbose) {
- std::stringstream dbgstr;
- dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
- dbgs() << dbgstr.str();
- });
-
- // Factor in reg pressure as a heuristic.
- if (!IgnoreBBRegPressure) {
- // Decrease priority by the amount that register pressure exceeds the limit.
- ResCount -= (Delta.Excess.getUnitInc()*PriorityOne);
- // Decrease priority if register pressure exceeds the limit.
- ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne);
- // Decrease priority slightly if register pressure would increase over the
- // current maximum.
- ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo);
- // If there are register pressure issues, then we remove the value added for
- // the instruction being available. The rationale is that we really don't
- // want to schedule an instruction that causes a spill.
- if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
- (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
- Delta.CurrentMax.getUnitInc()))
- ResCount -= IsAvailableAmt;
- LLVM_DEBUG(if (verbose) {
- dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
- << Delta.CriticalMax.getUnitInc() << "/"
- << Delta.CurrentMax.getUnitInc() << ")|";
- });
- }
-
- // Give a little extra priority to a .cur instruction if there is a resource
- // available for it.
auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
auto &QII = *QST.getInstrInfo();
if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) {
@@ -698,303 +66,5 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
}
}
- // Give preference to a zero latency instruction if the dependent
- // instruction is in the current packet.
- if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
- for (const SDep &PI : SU->Preds) {
- if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
- PI.getLatency() == 0 &&
- Top.ResourceModel->isInPacket(PI.getSUnit())) {
- ResCount += PriorityThree;
- LLVM_DEBUG(if (verbose) dbgs() << "Z|");
- }
- }
- } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
- for (const SDep &SI : SU->Succs) {
- if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
- SI.getLatency() == 0 &&
- Bot.ResourceModel->isInPacket(SI.getSUnit())) {
- ResCount += PriorityThree;
- LLVM_DEBUG(if (verbose) dbgs() << "Z|");
- }
- }
- }
-
- // If the instruction has a non-zero latency dependence with an instruction in
- // the current packet, then it should not be scheduled yet. The case occurs
- // when the dependent instruction is scheduled in a new packet, so the
- // scheduler updates the current cycle and pending instructions become
- // available.
- if (CheckEarlyAvail) {
- if (Q.getID() == TopQID) {
- for (const auto &PI : SU->Preds) {
- if (PI.getLatency() > 0 &&
- Top.ResourceModel->isInPacket(PI.getSUnit())) {
- ResCount -= PriorityOne;
- LLVM_DEBUG(if (verbose) dbgs() << "D|");
- }
- }
- } else {
- for (const auto &SI : SU->Succs) {
- if (SI.getLatency() > 0 &&
- Bot.ResourceModel->isInPacket(SI.getSUnit())) {
- ResCount -= PriorityOne;
- LLVM_DEBUG(if (verbose) dbgs() << "D|");
- }
- }
- }
- }
-
- LLVM_DEBUG(if (verbose) {
- std::stringstream dbgstr;
- dbgstr << "Total " << std::setw(4) << ResCount << ")";
- dbgs() << dbgstr.str();
- });
-
return ResCount;
}
-
-/// Pick the best candidate from the top queue.
-///
-/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
-/// DAG building. To adjust for the current scheduling location we need to
-/// maintain the number of vreg uses remaining to be top-scheduled.
-ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
-pickNodeFromQueue(VLIWSchedBoundary &Zone, const RegPressureTracker &RPTracker,
- SchedCandidate &Candidate) {
- ReadyQueue &Q = Zone.Available;
- LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
- readyQueueVerboseDump(RPTracker, Candidate, Q);
- else Q.dump(););
-
- // getMaxPressureDelta temporarily modifies the tracker.
- RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
-
- // BestSU remains NULL if no top candidates beat the best existing candidate.
- CandResult FoundCandidate = NoCand;
- for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
- RegPressureDelta RPDelta;
- TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
- DAG->getRegionCriticalPSets(),
- DAG->getRegPressure().MaxSetPressure);
-
- int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
-
- // Initialize the candidate if needed.
- if (!Candidate.SU) {
- LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- Candidate.SCost = CurrentCost;
- FoundCandidate = NodeOrder;
- continue;
- }
-
- // Choose node order for negative cost candidates. There is no good
- // candidate in this case.
- if (CurrentCost < 0 && Candidate.SCost < 0) {
- if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
- || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
- LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- Candidate.SCost = CurrentCost;
- FoundCandidate = NodeOrder;
- }
- continue;
- }
-
- // Best cost.
- if (CurrentCost > Candidate.SCost) {
- LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- Candidate.SCost = CurrentCost;
- FoundCandidate = BestCost;
- continue;
- }
-
- // Choose an instruction that does not depend on an artificial edge.
- unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
- unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
- if (CurrWeak != CandWeak) {
- if (CurrWeak < CandWeak) {
- LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- Candidate.SCost = CurrentCost;
- FoundCandidate = Weak;
- }
- continue;
- }
-
- if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
- unsigned CurrSize, CandSize;
- if (Q.getID() == TopQID) {
- CurrSize = (*I)->Succs.size();
- CandSize = Candidate.SU->Succs.size();
- } else {
- CurrSize = (*I)->Preds.size();
- CandSize = Candidate.SU->Preds.size();
- }
- if (CurrSize > CandSize) {
- LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- Candidate.SCost = CurrentCost;
- FoundCandidate = BestCost;
- }
- // Keep the old candidate if it's a better candidate. That is, don't use
- // the subsequent tie breaker.
- if (CurrSize != CandSize)
- continue;
- }
-
- // Tie breaker.
- // To avoid scheduling indeterminism, we need a tie breaker
- // for the case when cost is identical for two nodes.
- if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
- if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
- || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
- LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- Candidate.SCost = CurrentCost;
- FoundCandidate = NodeOrder;
- continue;
- }
- }
-
- // Fall through to original instruction order.
- // Only consider node order if Candidate was chosen from this Q.
- if (FoundCandidate == NoCand)
- continue;
- }
- return FoundCandidate;
-}
-
-/// Pick the best candidate node from either the top or bottom queue.
-SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
- // Schedule as far as possible in the direction of no choice. This is most
- // efficient, but also provides the best heuristics for CriticalPSets.
- if (SUnit *SU = Bot.pickOnlyChoice()) {
- LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
- IsTopNode = false;
- return SU;
- }
- if (SUnit *SU = Top.pickOnlyChoice()) {
- LLVM_DEBUG(dbgs() << "Picked only Top\n");
- IsTopNode = true;
- return SU;
- }
- SchedCandidate BotCand;
- // Prefer bottom scheduling when heuristics are silent.
- CandResult BotResult = pickNodeFromQueue(Bot,
- DAG->getBotRPTracker(), BotCand);
- assert(BotResult != NoCand && "failed to find the first candidate");
-
- // If either Q has a single candidate that provides the least increase in
- // Excess pressure, we can immediately schedule from that Q.
- //
- // RegionCriticalPSets summarizes the pressure within the scheduled region and
- // affects picking from either Q. If scheduling in one direction must
- // increase pressure for one of the excess PSets, then schedule in that
- // direction first to provide more freedom in the other direction.
- if (BotResult == SingleExcess || BotResult == SingleCritical) {
- LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
- IsTopNode = false;
- return BotCand.SU;
- }
- // Check if the top Q has a better candidate.
- SchedCandidate TopCand;
- CandResult TopResult = pickNodeFromQueue(Top,
- DAG->getTopRPTracker(), TopCand);
- assert(TopResult != NoCand && "failed to find the first candidate");
-
- if (TopResult == SingleExcess || TopResult == SingleCritical) {
- LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
- IsTopNode = true;
- return TopCand.SU;
- }
- // If either Q has a single candidate that minimizes pressure above the
- // original region's pressure pick it.
- if (BotResult == SingleMax) {
- LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
- IsTopNode = false;
- return BotCand.SU;
- }
- if (TopResult == SingleMax) {
- LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
- IsTopNode = true;
- return TopCand.SU;
- }
- if (TopCand.SCost > BotCand.SCost) {
- LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
- IsTopNode = true;
- return TopCand.SU;
- }
- // Otherwise prefer the bottom candidate in node order.
- LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
- IsTopNode = false;
- return BotCand.SU;
-}
-
-/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
-SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
- if (DAG->top() == DAG->bottom()) {
- assert(Top.Available.empty() && Top.Pending.empty() &&
- Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
- return nullptr;
- }
- SUnit *SU;
- if (ForceTopDown) {
- SU = Top.pickOnlyChoice();
- if (!SU) {
- SchedCandidate TopCand;
- CandResult TopResult =
- pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
- assert(TopResult != NoCand && "failed to find the first candidate");
- (void)TopResult;
- SU = TopCand.SU;
- }
- IsTopNode = true;
- } else if (ForceBottomUp) {
- SU = Bot.pickOnlyChoice();
- if (!SU) {
- SchedCandidate BotCand;
- CandResult BotResult =
- pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
- assert(BotResult != NoCand && "failed to find the first candidate");
- (void)BotResult;
- SU = BotCand.SU;
- }
- IsTopNode = false;
- } else {
- SU = pickNodeBidrectional(IsTopNode);
- }
- if (SU->isTopReady())
- Top.removeReady(SU);
- if (SU->isBottomReady())
- Bot.removeReady(SU);
-
- LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
- << " Scheduling instruction in cycle "
- << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
- << reportPackets() << ")\n";
- DAG->dumpNode(*SU));
- return SU;
-}
-
-/// Update the scheduler's state after scheduling a node. This is the same node
-/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
-/// to update it's state based on the current cycle before MachineSchedStrategy
-/// does.
-void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
- if (IsTopNode) {
- Top.bumpNode(SU);
- SU->TopReadyCycle = Top.CurrCycle;
- } else {
- Bot.bumpNode(SU);
- SU->BotReadyCycle = Bot.CurrCycle;
- }
-}
diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
index fb0a7abd339b..3d8f557dc787 100644
--- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -13,261 +13,28 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include <algorithm>
-#include <cassert>
-#include <limits>
-#include <memory>
-#include <vector>
+#include "llvm/CodeGen/VLIWMachineScheduler.h"
namespace llvm {
class SUnit;
-class VLIWResourceModel {
- /// ResourcesModel - Represents VLIW state.
- /// Not limited to VLIW targets per se, but assumes
- /// definition of DFA by a target.
- DFAPacketizer *ResourcesModel;
-
- const TargetSchedModel *SchedModel;
-
- /// Local packet/bundle model. Purely
- /// internal to the MI schedulre at the time.
- std::vector<SUnit *> Packet;
-
- /// Total packets created.
- unsigned TotalPackets = 0;
-
+class HexagonVLIWResourceModel : public VLIWResourceModel {
public:
- VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM)
- : SchedModel(SM) {
- ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI);
-
- // This hard requirement could be relaxed,
- // but for now do not let it proceed.
- assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
-
- Packet.resize(SchedModel->getIssueWidth());
- Packet.clear();
- ResourcesModel->clearResources();
- }
-
- ~VLIWResourceModel() {
- delete ResourcesModel;
- }
-
- void resetPacketState() {
- Packet.clear();
- }
-
- void resetDFA() {
- ResourcesModel->clearResources();
- }
-
- void reset() {
- Packet.clear();
- ResourcesModel->clearResources();
- }
-
- bool isResourceAvailable(SUnit *SU, bool IsTop);
- bool reserveResources(SUnit *SU, bool IsTop);
- unsigned getTotalPackets() const { return TotalPackets; }
- bool isInPacket(SUnit *SU) const { return is_contained(Packet, SU); }
+ using VLIWResourceModel::VLIWResourceModel;
+ bool hasDependence(const SUnit *SUd, const SUnit *SUu) override;
};
-/// Extend the standard ScheduleDAGMI to provide more context and override the
-/// top-level schedule() driver.
-class VLIWMachineScheduler : public ScheduleDAGMILive {
-public:
- VLIWMachineScheduler(MachineSchedContext *C,
- std::unique_ptr<MachineSchedStrategy> S)
- : ScheduleDAGMILive(C, std::move(S)) {}
-
- /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
- /// time to do some work.
- void schedule() override;
-
- RegisterClassInfo *getRegClassInfo() { return RegClassInfo; }
- int getBBSize() { return BB->size(); }
-};
-
-//===----------------------------------------------------------------------===//
-// ConvergingVLIWScheduler - Implementation of the standard
-// MachineSchedStrategy.
-//===----------------------------------------------------------------------===//
-
-/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics
-/// to balance the schedule.
-class ConvergingVLIWScheduler : public MachineSchedStrategy {
- /// Store the state used by ConvergingVLIWScheduler heuristics, required
- /// for the lifetime of one invocation of pickNode().
- struct SchedCandidate {
- // The best SUnit candidate.
- SUnit *SU = nullptr;
-
- // Register pressure values for the best candidate.
- RegPressureDelta RPDelta;
-
- // Best scheduling cost.
- int SCost = 0;
-
- SchedCandidate() = default;
- };
- /// Represent the type of SchedCandidate found within a single queue.
- enum CandResult {
- NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure,
- BestCost, Weak};
-
- /// Each Scheduling boundary is associated with ready queues. It tracks the
- /// current cycle in whichever direction at has moved, and maintains the state
- /// of "hazards" and other interlocks at the current cycle.
- struct VLIWSchedBoundary {
- VLIWMachineScheduler *DAG = nullptr;
- const TargetSchedModel *SchedModel = nullptr;
-
- ReadyQueue Available;
- ReadyQueue Pending;
- bool CheckPending = false;
-
- ScheduleHazardRecognizer *HazardRec = nullptr;
- VLIWResourceModel *ResourceModel = nullptr;
-
- unsigned CurrCycle = 0;
- unsigned IssueCount = 0;
- unsigned CriticalPathLength = 0;
-
- /// MinReadyCycle - Cycle of the soonest available instruction.
- unsigned MinReadyCycle = std::numeric_limits<unsigned>::max();
-
- // Remember the greatest min operand latency.
- unsigned MaxMinLatency = 0;
-
- /// Pending queues extend the ready queues with the same ID and the
- /// PendingFlag set.
- VLIWSchedBoundary(unsigned ID, const Twine &Name)
- : Available(ID, Name+".A"),
- Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P") {}
-
- ~VLIWSchedBoundary() {
- delete ResourceModel;
- delete HazardRec;
- }
-
- void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) {
- DAG = dag;
- SchedModel = smodel;
- CurrCycle = 0;
- IssueCount = 0;
- // Initialize the critical path length limit, which used by the scheduling
- // cost model to determine the value for scheduling an instruction. We use
- // a slightly different heuristic for small and large functions. For small
- // functions, it's important to use the height/depth of the instruction.
- // For large functions, prioritizing by height or depth increases spills.
- CriticalPathLength = DAG->getBBSize() / SchedModel->getIssueWidth();
- if (DAG->getBBSize() < 50)
- // We divide by two as a cheap and simple heuristic to reduce the
- // critcal path length, which increases the priority of using the graph
- // height/depth in the scheduler's cost computation.
- CriticalPathLength >>= 1;
- else {
- // For large basic blocks, we prefer a larger critical path length to
- // decrease the priority of using the graph height/depth.
- unsigned MaxPath = 0;
- for (auto &SU : DAG->SUnits)
- MaxPath = std::max(MaxPath, isTop() ? SU.getHeight() : SU.getDepth());
- CriticalPathLength = std::max(CriticalPathLength, MaxPath) + 1;
- }
- }
-
- bool isTop() const {
- return Available.getID() == ConvergingVLIWScheduler::TopQID;
- }
-
- bool checkHazard(SUnit *SU);
-
- void releaseNode(SUnit *SU, unsigned ReadyCycle);
-
- void bumpCycle();
-
- void bumpNode(SUnit *SU);
-
- void releasePending();
-
- void removeReady(SUnit *SU);
-
- SUnit *pickOnlyChoice();
-
- bool isLatencyBound(SUnit *SU) {
- if (CurrCycle >= CriticalPathLength)
- return true;
- unsigned PathLength = isTop() ? SU->getHeight() : SU->getDepth();
- return CriticalPathLength - CurrCycle <= PathLength;
- }
- };
-
- VLIWMachineScheduler *DAG = nullptr;
- const TargetSchedModel *SchedModel = nullptr;
-
- // State of the top and bottom scheduled instruction boundaries.
- VLIWSchedBoundary Top;
- VLIWSchedBoundary Bot;
-
- /// List of pressure sets that have a high pressure level in the region.
- std::vector<bool> HighPressureSets;
-
-public:
- /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
- enum {
- TopQID = 1,
- BotQID = 2,
- LogMaxQID = 2
- };
-
- ConvergingVLIWScheduler() : Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
-
- void initialize(ScheduleDAGMI *dag) override;
-
- SUnit *pickNode(bool &IsTopNode) override;
-
- void schedNode(SUnit *SU, bool IsTopNode) override;
-
- void releaseTopNode(SUnit *SU) override;
-
- void releaseBottomNode(SUnit *SU) override;
-
- unsigned reportPackets() {
- return Top.ResourceModel->getTotalPackets() +
- Bot.ResourceModel->getTotalPackets();
- }
-
+class HexagonConvergingVLIWScheduler : public ConvergingVLIWScheduler {
protected:
- SUnit *pickNodeBidrectional(bool &IsTopNode);
-
- int pressureChange(const SUnit *SU, bool isBotUp);
-
- int SchedulingCost(ReadyQueue &Q,
- SUnit *SU, SchedCandidate &Candidate,
- RegPressureDelta &Delta, bool verbose);
-
- CandResult pickNodeFromQueue(VLIWSchedBoundary &Zone,
- const RegPressureTracker &RPTracker,
- SchedCandidate &Candidate);
-#ifndef NDEBUG
- void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
- int Cost, PressureChange P = PressureChange());
-
- void readyQueueVerboseDump(const RegPressureTracker &RPTracker,
- SchedCandidate &Candidate, ReadyQueue &Q);
-#endif
+ VLIWResourceModel *
+ createVLIWResourceModel(const TargetSubtargetInfo &STI,
+ const TargetSchedModel *SchedModel) const override;
+ int SchedulingCost(ReadyQueue &Q, SUnit *SU, SchedCandidate &Candidate,
+ RegPressureDelta &Delta, bool verbose) override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Hexagon/HexagonPseudo.td b/llvm/lib/Target/Hexagon/HexagonPseudo.td
index 11f8af7c41a0..afd63d6d4aa7 100644
--- a/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -572,3 +572,14 @@ defm PS_storerd : NewCircularStore<DoubleRegs, WordAccess>;
// __builtin_trap.
let hasSideEffects = 1, isPseudo = 1, isCodeGenOnly = 1, isSolo = 1 in
def PS_crash: InstHexagon<(outs), (ins), "", [], "", PSEUDO, TypePSEUDO>;
+
+// This is actual trap1 instruction from before v65. It's here since it is
+// no longer included in DepInstrInfo.td.
+def PS_trap1 : HInst<(outs), (ins u8_0Imm:$Ii), "trap1(#$Ii)", tc_53c851ab,
+ TypeJ>, Enc_a51a9a, Requires<[HasPreV65]> {
+ let Inst{1-0} = 0b00;
+ let Inst{7-5} = 0b000;
+ let Inst{13-13} = 0b0;
+ let Inst{31-16} = 0b0101010010000000;
+}
+
diff --git a/llvm/lib/Target/Hexagon/HexagonSchedule.td b/llvm/lib/Target/Hexagon/HexagonSchedule.td
index 88d775f16a7f..931578c9e78d 100644
--- a/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -69,3 +69,4 @@ include "HexagonScheduleV66.td"
include "HexagonScheduleV67.td"
include "HexagonScheduleV67T.td"
include "HexagonScheduleV68.td"
+include "HexagonScheduleV69.td"
diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV69.td b/llvm/lib/Target/Hexagon/HexagonScheduleV69.td
new file mode 100644
index 000000000000..ddd246866e20
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonScheduleV69.td
@@ -0,0 +1,40 @@
+//=-HexagonScheduleV69.td - HexagonV69 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// ScalarItin and HVXItin contain some old itineraries
+// still used by a handful of instructions. Hopefully, we will be able
+// to get rid of them soon.
+def HexagonV69ItinList : DepScalarItinV69, ScalarItin,
+ DepHVXItinV69, HVXItin, PseudoItin {
+ list<InstrItinData> ItinList =
+ !listconcat(DepScalarItinV69_list, ScalarItin_list,
+ DepHVXItinV69_list, HVXItin_list, PseudoItin_list);
+}
+
+def HexagonItinerariesV69 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+ CVI_ALL_NOMEM, CVI_ZW],
+ [Hex_FWD, HVX_FWD],
+ HexagonV69ItinList.ItinList>;
+
+def HexagonModelV69 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV69;
+ let LoadLatency = 1;
+ let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V69 Resource Definitions -
+//===----------------------------------------------------------------------===//
+
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index ecb2f88d8096..08bb4580b585 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -75,6 +75,10 @@ static cl::opt<bool> EnableCheckBankConflict("hexagon-check-bank-conflict",
cl::Hidden, cl::ZeroOrMore, cl::init(true),
cl::desc("Enable checking for cache bank conflicts"));
+static cl::opt<bool> EnableV68FloatCodeGen(
+ "force-hvx-float", cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable the code-generation for vector float instructions on v68."));
+
HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
StringRef FS, const TargetMachine &TM)
: HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
@@ -103,13 +107,71 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
UseAudioOps = false;
UseLongCalls = false;
- UseBSBScheduling = hasV60Ops() && EnableBSBSched;
+ SubtargetFeatures Features(FS);
+
+ // Turn on QFloat if the HVX version is v68+.
+ // The function ParseSubtargetFeatures will set feature bits and initialize
+ // subtarget's variables all in one, so there isn't a good way to preprocess
+ // the feature string, other than by tinkering with it directly.
+ auto IsQFloatFS = [](StringRef F) {
+ return F == "+hvx-qfloat" || F == "-hvx-qfloat";
+ };
+ if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) {
+ auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
+ for (StringRef F : llvm::reverse(Features.getFeatures())) {
+ if (F.startswith("+hvxv"))
+ return F;
+ }
+ for (StringRef F : llvm::reverse(Features.getFeatures())) {
+ if (F == "-hvx")
+ return StringRef();
+ if (F.startswith("+hvx") || F == "-hvx")
+ return F.take_front(4); // Return "+hvx" or "-hvx".
+ }
+ return StringRef();
+ };
+
+ bool AddQFloat = false;
+ StringRef HvxVer = getHvxVersion(FS);
+ if (HvxVer.startswith("+hvxv")) {
+ int Ver = 0;
+ if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68)
+ AddQFloat = true;
+ } else if (HvxVer == "+hvx") {
+ if (hasV68Ops())
+ AddQFloat = true;
+ }
- ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS);
+ if (AddQFloat)
+ Features.AddFeature("+hvx-qfloat");
+ }
+
+ std::string FeatureString = Features.getString();
+ ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString);
+
+ // Enable float code generation only if the flag(s) are set and
+ // the feature is enabled. v68 is guarded by additional flags.
+ bool GreaterThanV68 = false;
+ if (useHVXV69Ops())
+ GreaterThanV68 = true;
+
+ // Support for deprecated qfloat/ieee codegen flags
+ if (!GreaterThanV68) {
+ if (EnableV68FloatCodeGen)
+ UseHVXFloatingPoint = true;
+ } else {
+ UseHVXFloatingPoint = true;
+ }
+
+ if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
+ LLVM_DEBUG(
+ dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
if (OverrideLongCalls.getPosition())
UseLongCalls = OverrideLongCalls;
+ UseBSBScheduling = hasV60Ops() && EnableBSBSched;
+
if (isTinyCore()) {
// Tiny core has a single thread, so back-to-back scheduling is enabled by
// default.
@@ -117,10 +179,10 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
UseBSBScheduling = false;
}
- FeatureBitset Features = getFeatureBits();
+ FeatureBitset FeatureBits = getFeatureBits();
if (HexagonDisableDuplex)
- setFeatureBits(Features.reset(Hexagon::FeatureDuplex));
- setFeatureBits(Hexagon_MC::completeHVXFeatures(Features));
+ setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex));
+ setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits));
return *this;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index a4f2e159bf4b..e4f375440be1 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -56,6 +56,10 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool UseSmallData = false;
bool UseUnsafeMath = false;
bool UseZRegOps = false;
+ bool UseHVXIEEEFPOps = false;
+ bool UseHVXQFloatOps = false;
+ bool UseHVXFloatingPoint = false;
+ bool UseCabac = false;
bool HasPreV65 = false;
bool HasMemNoShuf = false;
@@ -138,6 +142,8 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+ bool isXRaySupported() const override { return true; }
+
bool hasV5Ops() const {
return getHexagonArchVersion() >= Hexagon::ArchEnum::V5;
}
@@ -186,6 +192,12 @@ public:
bool hasV68OpsOnly() const {
return getHexagonArchVersion() == Hexagon::ArchEnum::V68;
}
+ bool hasV69Ops() const {
+ return getHexagonArchVersion() >= Hexagon::ArchEnum::V69;
+ }
+ bool hasV69OpsOnly() const {
+ return getHexagonArchVersion() == Hexagon::ArchEnum::V69;
+ }
bool useAudioOps() const { return UseAudioOps; }
bool useCompound() const { return UseCompound; }
@@ -197,10 +209,16 @@ public:
bool useSmallData() const { return UseSmallData; }
bool useUnsafeMath() const { return UseUnsafeMath; }
bool useZRegOps() const { return UseZRegOps; }
+ bool useCabac() const { return UseCabac; }
bool isTinyCore() const { return HexagonProcFamily == TinyCore; }
bool isTinyCoreWithDuplex() const { return isTinyCore() && EnableDuplex; }
+ bool useHVXIEEEFPOps() const { return UseHVXIEEEFPOps && useHVXOps(); }
+ bool useHVXQFloatOps() const {
+ return UseHVXQFloatOps && HexagonHVXVersion >= Hexagon::ArchEnum::V68;
+ }
+ bool useHVXFloatingPoint() const { return UseHVXFloatingPoint; }
bool useHVXOps() const {
return HexagonHVXVersion > Hexagon::ArchEnum::NoArch;
}
@@ -222,6 +240,9 @@ public:
bool useHVXV68Ops() const {
return HexagonHVXVersion >= Hexagon::ArchEnum::V68;
}
+ bool useHVXV69Ops() const {
+ return HexagonHVXVersion >= Hexagon::ArchEnum::V69;
+ }
bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; }
bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; }
@@ -281,7 +302,11 @@ public:
}
ArrayRef<MVT> getHVXElementTypes() const {
- static MVT Types[] = { MVT::i8, MVT::i16, MVT::i32 };
+ static MVT Types[] = {MVT::i8, MVT::i16, MVT::i32};
+ static MVT TypesV68[] = {MVT::i8, MVT::i16, MVT::i32, MVT::f16, MVT::f32};
+
+ if (useHVXV68Ops() && useHVXFloatingPoint())
+ return makeArrayRef(TypesV68);
return makeArrayRef(Types);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 66de698182d7..fcf829b522cc 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -21,6 +21,7 @@
#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/VLIWMachineScheduler.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/TargetRegistry.h"
@@ -120,8 +121,8 @@ extern "C" int HexagonTargetMachineModule;
int HexagonTargetMachineModule = 0;
static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
- ScheduleDAGMILive *DAG =
- new VLIWMachineScheduler(C, std::make_unique<ConvergingVLIWScheduler>());
+ ScheduleDAGMILive *DAG = new VLIWMachineScheduler(
+ C, std::make_unique<HexagonConvergingVLIWScheduler>());
DAG->addMutation(std::make_unique<HexagonSubtarget::UsrOverflowMutation>());
DAG->addMutation(std::make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
DAG->addMutation(std::make_unique<HexagonSubtarget::CallMutation>());
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 1d325553f45a..85ec0cdcd8f0 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -294,7 +294,7 @@ bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
bool Avail = ResourceTracker->canReserveResources(*ExtMI);
if (Reserve && Avail)
ResourceTracker->reserveResources(*ExtMI);
- MF.DeleteMachineInstr(ExtMI);
+ MF.deleteMachineInstr(ExtMI);
return Avail;
}
@@ -890,7 +890,7 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
const MCInstrDesc &D = HII->get(NewOpcode);
MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI);
- MF.DeleteMachineInstr(NewMI);
+ MF.deleteMachineInstr(NewMI);
if (!ResourcesAvailable)
return false;
@@ -1082,6 +1082,11 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
if (HII->isSolo(MI))
return true;
+ if (MI.getOpcode() == Hexagon::PATCHABLE_FUNCTION_ENTER ||
+ MI.getOpcode() == Hexagon::PATCHABLE_FUNCTION_EXIT ||
+ MI.getOpcode() == Hexagon::PATCHABLE_TAIL_CALL)
+ return true;
+
if (MI.getOpcode() == Hexagon::A2_nop)
return true;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index ea2798a3b44e..21386a91c7b3 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -536,7 +536,7 @@ auto AlignVectors::createAddressGroups() -> bool {
erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
// Remove groups that don't use HVX types.
erase_if(AddrGroups, [&](auto &G) {
- return !llvm::any_of(
+ return llvm::none_of(
G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
});
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 4125566bc58a..c9a1781a4543 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -154,9 +154,8 @@ namespace HexagonII {
PrefersSlot3Pos = 57,
PrefersSlot3Mask = 0x1,
- // v65
- HasTmpDstPos = 60,
- HasTmpDstMask = 0x1,
+ HasHvxTmpPos = 60,
+ HasHvxTmpMask = 0x1,
CVINewPos = 62,
CVINewMask = 0x1,
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index fee1acdbbe8a..96c2965296ca 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -98,6 +98,10 @@ void HexagonMCChecker::init(MCInst const &MCI) {
for (unsigned i = 0; i < MCID.getNumImplicitUses(); ++i)
initReg(MCI, MCID.getImplicitUses()[i], PredReg, isTrue);
+ const bool IgnoreTmpDst = (HexagonMCInstrInfo::hasTmpDst(MCII, MCI) ||
+ HexagonMCInstrInfo::hasHvxTmp(MCII, MCI)) &&
+ STI.getFeatureBits()[Hexagon::ArchV69];
+
// Get implicit register definitions.
if (const MCPhysReg *ImpDef = MCID.getImplicitDefs())
for (; *ImpDef; ++ImpDef) {
@@ -123,7 +127,7 @@ void HexagonMCChecker::init(MCInst const &MCI) {
HexagonMCInstrInfo::isPredicateLate(MCII, MCI))
// Include implicit late predicates.
LatePreds.insert(R);
- else
+ else if (!IgnoreTmpDst)
Defs[R].insert(PredSense(PredReg, isTrue));
}
@@ -178,7 +182,7 @@ void HexagonMCChecker::init(MCInst const &MCI) {
// vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and
// destination registers with this instruction. same for vdeal(Vx,Vy,Rx)
Uses.insert(*SRI);
- else
+ else if (!IgnoreTmpDst)
Defs[*SRI].insert(PredSense(PredReg, isTrue));
}
}
@@ -227,9 +231,11 @@ bool HexagonMCChecker::check(bool FullCheck) {
bool chkAXOK = checkAXOK();
bool chkCofMax1 = checkCOFMax1();
bool chkHWLoop = checkHWLoop();
+ bool chkValidTmpDst = FullCheck ? checkValidTmpDst() : true;
bool chkLegalVecRegPair = checkLegalVecRegPair();
bool chk = chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl &&
- chkAXOK && chkCofMax1 && chkHWLoop && chkLegalVecRegPair;
+ chkAXOK && chkCofMax1 && chkHWLoop && chkValidTmpDst &&
+ chkLegalVecRegPair;
return chk;
}
@@ -676,6 +682,32 @@ bool HexagonMCChecker::checkShuffle() {
return MCSDX.check();
}
+bool HexagonMCChecker::checkValidTmpDst() {
+ if (!STI.getFeatureBits()[Hexagon::ArchV69]) {
+ return true;
+ }
+ auto HasTmp = [&](MCInst const &I) {
+ return HexagonMCInstrInfo::hasTmpDst(MCII, I) ||
+ HexagonMCInstrInfo::hasHvxTmp(MCII, I);
+ };
+ unsigned HasTmpCount =
+ llvm::count_if(HexagonMCInstrInfo::bundleInstructions(MCII, MCB), HasTmp);
+
+ if (HasTmpCount > 1) {
+ reportError(
+ MCB.getLoc(),
+ "this packet has more than one HVX vtmp/.tmp destination instruction");
+
+ for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB))
+ if (HasTmp(I))
+ reportNote(I.getLoc(),
+ "this is an HVX vtmp/.tmp destination instruction");
+
+ return false;
+ }
+ return true;
+}
+
void HexagonMCChecker::compoundRegisterMap(unsigned &Register) {
switch (Register) {
default:
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index 00afdb664ba5..dbd3d8ae45e6 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -99,6 +99,7 @@ class HexagonMCChecker {
bool checkHWLoop();
bool checkCOFMax1();
bool checkLegalVecRegPair();
+ bool checkValidTmpDst();
static void compoundRegisterMap(unsigned &);
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index fa12fe1da448..68ccb20f4f15 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -939,10 +939,24 @@ bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII,
return (F >> HexagonII::PrefersSlot3Pos) & HexagonII::PrefersSlot3Mask;
}
-/// return true if instruction has hasTmpDst attribute.
bool HexagonMCInstrInfo::hasTmpDst(MCInstrInfo const &MCII, MCInst const &MCI) {
+ switch (MCI.getOpcode()) {
+ default:
+ return false;
+ case Hexagon::V6_vgathermh:
+ case Hexagon::V6_vgathermhq:
+ case Hexagon::V6_vgathermhw:
+ case Hexagon::V6_vgathermhwq:
+ case Hexagon::V6_vgathermw:
+ case Hexagon::V6_vgathermwq:
+ return true;
+ }
+ return false;
+}
+
+bool HexagonMCInstrInfo::hasHvxTmp(MCInstrInfo const &MCII, MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return (F >> HexagonII::HasTmpDstPos) & HexagonII::HasTmpDstMask;
+ return (F >> HexagonII::HasHvxTmpPos) & HexagonII::HasHvxTmpMask;
}
bool HexagonMCInstrInfo::requiresSlot(MCSubtargetInfo const &STI,
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 7b3c079880f8..5c56db14798f 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -41,7 +41,8 @@ public:
namespace Hexagon {
-class PacketIterator {
+class PacketIterator : public std::iterator<std::forward_iterator_tag,
+ PacketIterator> {
MCInstrInfo const &MCII;
MCInst::const_iterator BundleCurrent;
MCInst::const_iterator BundleEnd;
@@ -188,6 +189,7 @@ bool hasImmExt(MCInst const &MCI);
bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI);
bool hasTmpDst(MCInstrInfo const &MCII, MCInst const &MCI);
+bool hasHvxTmp(MCInstrInfo const &MCII, MCInst const &MCI);
unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb);
int64_t minConstant(MCInst const &MCI, size_t Index);
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index d832a756cb92..dfdddb50657c 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -80,6 +80,8 @@ cl::opt<bool> MV67T("mv67t", cl::Hidden, cl::desc("Build for Hexagon V67T"),
cl::init(false));
cl::opt<bool> MV68("mv68", cl::Hidden, cl::desc("Build for Hexagon V68"),
cl::init(false));
+cl::opt<bool> MV69("mv69", cl::Hidden, cl::desc("Build for Hexagon V69"),
+ cl::init(false));
cl::opt<Hexagon::ArchEnum>
EnableHVX("mhvx",
@@ -91,6 +93,7 @@ cl::opt<Hexagon::ArchEnum>
clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"),
clEnumValN(Hexagon::ArchEnum::V67, "v67", "Build for HVX v67"),
clEnumValN(Hexagon::ArchEnum::V68, "v68", "Build for HVX v68"),
+ clEnumValN(Hexagon::ArchEnum::V69, "v69", "Build for HVX v69"),
// Sentinel for no value specified.
clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
// Sentinel for flag not present.
@@ -101,6 +104,11 @@ static cl::opt<bool>
DisableHVX("mno-hvx", cl::Hidden,
cl::desc("Disable Hexagon Vector eXtensions"));
+static cl::opt<bool>
+ EnableHvxIeeeFp("mhvx-ieee-fp", cl::Hidden,
+ cl::desc("Enable HVX IEEE floating point extensions"));
+static cl::opt<bool> EnableHexagonCabac
+ ("mcabac", cl::desc("tbd"), cl::init(false));
static StringRef DefaultArch = "hexagonv60";
@@ -123,6 +131,8 @@ static StringRef HexagonGetArchVariant() {
return "hexagonv67t";
if (MV68)
return "hexagonv68";
+ if (MV69)
+ return "hexagonv69";
return "";
}
@@ -371,6 +381,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
case Hexagon::ArchEnum::V68:
Result.push_back("+hvxv68");
break;
+ case Hexagon::ArchEnum::V69:
+ Result.push_back("+hvxv69");
+ break;
case Hexagon::ArchEnum::Generic:{
Result.push_back(StringSwitch<StringRef>(CPU)
.Case("hexagonv60", "+hvxv60")
@@ -379,13 +392,19 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
.Case("hexagonv66", "+hvxv66")
.Case("hexagonv67", "+hvxv67")
.Case("hexagonv67t", "+hvxv67")
- .Case("hexagonv68", "+hvxv68"));
+ .Case("hexagonv68", "+hvxv68")
+ .Case("hexagonv69", "+hvxv69"));
break;
}
case Hexagon::ArchEnum::NoArch:
// Sentinel if -mhvx isn't specified
break;
}
+ if (EnableHvxIeeeFp)
+ Result.push_back("+hvx-ieee-fp");
+ if (EnableHexagonCabac)
+ Result.push_back("+cabac");
+
return join(Result.begin(), Result.end(), ",");
}
}
@@ -422,8 +441,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// turns on hvxvNN, corresponding to the existing ArchVNN.
FeatureBitset FB = S;
unsigned CpuArch = ArchV5;
- for (unsigned F : {ArchV68, ArchV67, ArchV66, ArchV65, ArchV62, ArchV60,
- ArchV55, ArchV5}) {
+ for (unsigned F : {ArchV69, ArchV68, ArchV67, ArchV66, ArchV65, ArchV62,
+ ArchV60, ArchV55, ArchV5}) {
if (!FB.test(F))
continue;
CpuArch = F;
@@ -438,7 +457,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
}
bool HasHvxVer = false;
for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65,
- ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68}) {
+ ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68,
+ ExtensionHVXV69}) {
if (!FB.test(F))
continue;
HasHvxVer = true;
@@ -451,6 +471,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// HasHvxVer is false, and UseHvx is true.
switch (CpuArch) {
+ case ArchV69:
+ FB.set(ExtensionHVXV69);
+ LLVM_FALLTHROUGH;
case ArchV68:
FB.set(ExtensionHVXV68);
LLVM_FALLTHROUGH;
@@ -538,6 +561,7 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
{"hexagonv67", ELF::EF_HEXAGON_MACH_V67},
{"hexagonv67t", ELF::EF_HEXAGON_MACH_V67T},
{"hexagonv68", ELF::EF_HEXAGON_MACH_V68},
+ {"hexagonv69", ELF::EF_HEXAGON_MACH_V69},
};
auto F = ElfFlags.find(STI.getCPU());
diff --git a/llvm/lib/Target/M68k/M68kInstrControl.td b/llvm/lib/Target/M68k/M68kInstrControl.td
index 708474726861..9f87833ab0e2 100644
--- a/llvm/lib/Target/M68k/M68kInstrControl.td
+++ b/llvm/lib/Target/M68k/M68kInstrControl.td
@@ -118,13 +118,13 @@ def SET#"p8"#cc : MxSccM<cc, MxType8.POp, MxType8.PPat, MxEncEAp_0, MxExtI16_0>;
/// 0 1 0 0 1 1 1 0 1 1 | MODE | REG
///------------------------------+---------+---------
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
-class MxJMP<MxOperand LOCOp, ComplexPattern LOCPat, MxEncEA EA, MxEncExt EXT>
+class MxJMP<MxOperand LOCOp, MxEncEA EA, MxEncExt EXT>
: MxInst<(outs), (ins LOCOp:$dst), "jmp\t$dst", [(brind iPTR:$dst)],
MxEncoding<EA.Reg, EA.DA, EA.Mode, MxBead2Bits<0b11>,
MxBead4Bits<0b1110>, MxBead4Bits<0b0100>,
EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>;
-def JMP32j : MxJMP<MxARI32, MxCP_ARI, MxEncEAj_0, MxExtEmpty>;
+def JMP32j : MxJMP<MxARI32, MxEncEAj_0, MxExtEmpty>;
// FIXME Support 16 bit indirect jump.
@@ -147,17 +147,17 @@ def JMP32j : MxJMP<MxARI32, MxCP_ARI, MxEncEAj_0, MxExtEmpty>;
/// 32-BIT DISPLACEMENT IF 8-BIT DISPLACEMENT = $FF
/// --------------------------------------------------
let isBranch = 1, isTerminator = 1, Uses = [CCR] in
-class MxBcc<string cc, Operand TARGET, MxType TYPE, MxEncoding ENC = MxEncEmpty>
+class MxBcc<string cc, Operand TARGET, MxEncoding ENC = MxEncEmpty>
: MxInst<(outs), (ins TARGET:$dst), "b"#cc#"\t$dst", [], ENC>;
foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge",
"cs", "pl", "gt", "hi", "vc", "le", "vs"] in {
def B#cc#"8"
- : MxBcc<cc, MxBrTarget8, MxType8,
+ : MxBcc<cc, MxBrTarget8,
MxEncoding<MxBead8Disp<0>,
!cast<MxBead4Bits>("MxCC"#cc), MxBead4Bits<0x6>>>;
def B#cc#"16"
- : MxBcc<cc, MxBrTarget16, MxType16,
+ : MxBcc<cc, MxBrTarget16,
MxEncoding<MxBead4Bits<0x0>,
MxBead4Bits<0x0>, !cast<MxBead4Bits>("MxCC"#cc),
MxBead4Bits<0x6>, MxBead16Imm<0>>>;
@@ -179,13 +179,13 @@ def : Pat<(MxBrCond bb:$target, !cast<PatLeaf>("MxCOND"#cc), CCR),
/// 32-BIT DISPLACEMENT IF 8-BIT DISPLACEMENT = $FF
/// -------------------------------------------------
let isBranch = 1, isTerminator = 1, isBarrier=1 in
-class MxBra<Operand TARGET, MxType TYPE, MxEncoding ENC = MxEncEmpty>
+class MxBra<Operand TARGET, MxEncoding ENC = MxEncEmpty>
: MxInst<(outs), (ins TARGET:$dst), "bra\t$dst", [], ENC>;
-def BRA8 : MxBra<MxBrTarget8, MxType8,
+def BRA8 : MxBra<MxBrTarget8,
MxEncoding<MxBead8Disp<0>, MxBead4Bits<0x0>,
MxBead4Bits<0x6>>>;
-def BRA16 : MxBra<MxBrTarget16, MxType16,
+def BRA16 : MxBra<MxBrTarget16,
MxEncoding<MxBead4Bits<0x0>, MxBead4Bits<0x0>,
MxBead4Bits<0x0>, MxBead4Bits<0x6>,
MxBead16Imm<0>>>;
diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index 2a77a150f9aa..4ef9a567d453 100644
--- a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -189,8 +189,8 @@ bool MSP430FrameLowering::spillCalleeSavedRegisters(
MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
MFI->setCalleeSavedFrameSize(CSI.size() * 2);
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
+ for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
+ unsigned Reg = I.getReg();
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r))
diff --git a/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index 203e05dde7ad..419f0ac1a8a7 100644
--- a/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -479,14 +479,12 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
// remove the use-soft-float attribute
static void removeUseSoftFloat(Function &F) {
- AttrBuilder B;
LLVM_DEBUG(errs() << "removing -use-soft-float\n");
- B.addAttribute("use-soft-float", "false");
- F.removeFnAttrs(B);
+ F.removeFnAttr("use-soft-float");
if (F.hasFnAttribute("use-soft-float")) {
LLVM_DEBUG(errs() << "still has -use-soft-float\n");
}
- F.addFnAttrs(B);
+ F.addFnAttr("use-soft-float", "false");
}
// This pass only makes sense when the underlying chip has floating point but
diff --git a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
index aa8e298fa759..4e9a23d077da 100644
--- a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
+++ b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
@@ -36,7 +36,7 @@
///
/// Regarding compact branch hazard prevention:
///
-/// Hazards handled: forbidden slots for MIPSR6.
+/// Hazards handled: forbidden slots for MIPSR6, FPU slots for MIPS3 and below.
///
/// A forbidden slot hazard occurs when a compact branch instruction is executed
/// and the adjacent instruction in memory is a control transfer instruction
@@ -160,7 +160,10 @@ private:
bool buildProperJumpMI(MachineBasicBlock *MBB,
MachineBasicBlock::iterator Pos, DebugLoc DL);
void expandToLongBranch(MBBInfo &Info);
+ template <typename Pred, typename Safe>
+ bool handleSlot(Pred Predicate, Safe SafeInSlot);
bool handleForbiddenSlot();
+ bool handleFPUDelaySlot();
bool handlePossibleLongBranch();
const MipsSubtarget *STI;
@@ -738,30 +741,27 @@ static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
MBB.removeLiveIn(Mips::V0);
}
-bool MipsBranchExpansion::handleForbiddenSlot() {
- // Forbidden slot hazards are only defined for MIPSR6 but not microMIPSR6.
- if (!STI->hasMips32r6() || STI->inMicroMipsMode())
- return false;
-
+template <typename Pred, typename Safe>
+bool MipsBranchExpansion::handleSlot(Pred Predicate, Safe SafeInSlot) {
bool Changed = false;
for (MachineFunction::iterator FI = MFp->begin(); FI != MFp->end(); ++FI) {
for (Iter I = FI->begin(); I != FI->end(); ++I) {
- // Forbidden slot hazard handling. Use lookahead over state.
- if (!TII->HasForbiddenSlot(*I))
+ // Delay slot hazard handling. Use lookahead over state.
+ if (!Predicate(*I))
continue;
- Iter Inst;
+ Iter IInSlot;
bool LastInstInFunction =
std::next(I) == FI->end() && std::next(FI) == MFp->end();
if (!LastInstInFunction) {
std::pair<Iter, bool> Res = getNextMachineInstr(std::next(I), &*FI);
LastInstInFunction |= Res.second;
- Inst = Res.first;
+ IInSlot = Res.first;
}
- if (LastInstInFunction || !TII->SafeInForbiddenSlot(*Inst)) {
+ if (LastInstInFunction || !SafeInSlot(*IInSlot, *I)) {
MachineBasicBlock::instr_iterator Iit = I->getIterator();
if (std::next(Iit) == FI->end() ||
@@ -778,6 +778,29 @@ bool MipsBranchExpansion::handleForbiddenSlot() {
return Changed;
}
+bool MipsBranchExpansion::handleForbiddenSlot() {
+ // Forbidden slot hazards are only defined for MIPSR6 but not microMIPSR6.
+ if (!STI->hasMips32r6() || STI->inMicroMipsMode())
+ return false;
+
+ return handleSlot(
+ [this](auto &I) -> bool { return TII->HasForbiddenSlot(I); },
+ [this](auto &IInSlot, auto &I) -> bool {
+ return TII->SafeInForbiddenSlot(IInSlot);
+ });
+}
+
+bool MipsBranchExpansion::handleFPUDelaySlot() {
+ // FPU delay slots are only defined for MIPS3 and below.
+ if (STI->hasMips32() || STI->hasMips4())
+ return false;
+
+ return handleSlot([this](auto &I) -> bool { return TII->HasFPUDelaySlot(I); },
+ [this](auto &IInSlot, auto &I) -> bool {
+ return TII->SafeInFPUDelaySlot(IInSlot, I);
+ });
+}
+
bool MipsBranchExpansion::handlePossibleLongBranch() {
if (STI->inMips16Mode() || !STI->enableLongBranchPass())
return false;
@@ -857,13 +880,16 @@ bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) {
// Run these two at least once
bool longBranchChanged = handlePossibleLongBranch();
bool forbiddenSlotChanged = handleForbiddenSlot();
+ bool fpuDelaySlotChanged = handleFPUDelaySlot();
- bool Changed = longBranchChanged || forbiddenSlotChanged;
+ bool Changed =
+ longBranchChanged || forbiddenSlotChanged || fpuDelaySlotChanged;
// Then run them alternatively while there are changes
while (forbiddenSlotChanged) {
longBranchChanged = handlePossibleLongBranch();
- if (!longBranchChanged)
+ fpuDelaySlotChanged = handleFPUDelaySlot();
+ if (!longBranchChanged && !fpuDelaySlotChanged)
break;
forbiddenSlotChanged = handleForbiddenSlot();
}
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 4f364ef6afc7..9377e83524e1 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -4121,7 +4121,7 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
case 'y': // Same as 'r'. Exists for compatibility.
case 'r':
- if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+ if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8 || VT == MVT::i1) {
if (Subtarget.inMips16Mode())
return std::make_pair(0U, &Mips::CPU16RegsRegClass);
return std::make_pair(0U, &Mips::GPR32RegClass);
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 94828a976695..2bf8562895d7 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -568,11 +568,60 @@ bool MipsInstrInfo::SafeInForbiddenSlot(const MachineInstr &MI) const {
return (MI.getDesc().TSFlags & MipsII::IsCTI) == 0;
}
+bool MipsInstrInfo::SafeInFPUDelaySlot(const MachineInstr &MIInSlot,
+ const MachineInstr &FPUMI) const {
+ if (MIInSlot.isInlineAsm())
+ return false;
+
+ if (HasFPUDelaySlot(MIInSlot))
+ return false;
+
+ switch (MIInSlot.getOpcode()) {
+ case Mips::BC1F:
+ case Mips::BC1FL:
+ case Mips::BC1T:
+ case Mips::BC1TL:
+ return false;
+ }
+
+ for (const MachineOperand &Op : FPUMI.defs()) {
+ if (!Op.isReg())
+ continue;
+
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = MIInSlot.readsWritesVirtualRegister(Op.getReg());
+
+ if (Reads || Writes)
+ return false;
+ }
+
+ return true;
+}
+
/// Predicate for distingushing instructions that have forbidden slots.
bool MipsInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const {
return (MI.getDesc().TSFlags & MipsII::HasForbiddenSlot) != 0;
}
+/// Predicate for distingushing instructions that have FPU delay slots.
+bool MipsInstrInfo::HasFPUDelaySlot(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case Mips::MTC1:
+ case Mips::MFC1:
+ case Mips::MTC1_D64:
+ case Mips::MFC1_D64:
+ case Mips::DMTC1:
+ case Mips::DMFC1:
+ case Mips::FCMP_S32:
+ case Mips::FCMP_D32:
+ case Mips::FCMP_D64:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
/// Return the number of bytes of code the specified instruction may be.
unsigned MipsInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h
index c96ed202df30..46c1b73d512f 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -92,9 +92,16 @@ public:
/// Predicate to determine if an instruction can go in a forbidden slot.
bool SafeInForbiddenSlot(const MachineInstr &MI) const;
+ /// Predicate to determine if an instruction can go in an FPU delay slot.
+ bool SafeInFPUDelaySlot(const MachineInstr &MIInSlot,
+ const MachineInstr &FPUMI) const;
+
/// Predicate to determine if an instruction has a forbidden slot.
bool HasForbiddenSlot(const MachineInstr &MI) const;
+ /// Predicate to determine if an instruction has an FPU delay slot.
+ bool HasFPUDelaySlot(const MachineInstr &MI) const;
+
/// Insert nop instruction when hazard condition is found
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index c35e67d6726f..16add48d4602 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1098,10 +1098,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
O << " .attribute(.managed)";
}
- if (GVar->getAlignment() == 0)
- O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
+ if (MaybeAlign A = GVar->getAlign())
+ O << " .align " << A->value();
else
- O << " .align " << GVar->getAlignment();
+ O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
(ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
@@ -1290,10 +1290,10 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
O << ".";
emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
- if (GVar->getAlignment() == 0)
- O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
+ if (MaybeAlign A = GVar->getAlign())
+ O << " .align " << A->value();
else
- O << " .align " << GVar->getAlignment();
+ O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
// Special case for i128
if (ETy->isIntegerTy(128)) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
index 1f3b4c9440d8..bf3c87df2e08 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -126,9 +126,9 @@ static void CombineCVTAToLocal(MachineInstr &Root) {
// Check if MRI has only one non dbg use, which is Root
if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
- Prev.eraseFromParentAndMarkDBGValuesForRemoval();
+ Prev.eraseFromParent();
}
- Root.eraseFromParentAndMarkDBGValuesForRemoval();
+ Root.eraseFromParent();
}
bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
@@ -157,7 +157,7 @@ bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
const auto &MRI = MF.getRegInfo();
if (MRI.use_empty(NRI->getFrameRegister(MF))) {
if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {
- MI->eraseFromParentAndMarkDBGValuesForRemoval();
+ MI->eraseFromParent();
}
}
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 9e181d4052d6..ded922329ebf 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1576,6 +1576,16 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
std::swap(Operands[2], Operands[1]);
}
+ // Handle base mnemonic for atomic loads where the EH bit is zero.
+ if (Name == "lqarx" || Name == "ldarx" || Name == "lwarx" ||
+ Name == "lharx" || Name == "lbarx") {
+ if (Operands.size() != 5)
+ return false;
+ PPCOperand &EHOp = (PPCOperand &)*Operands[4];
+ if (EHOp.isU1Imm() && EHOp.getImm() == 0)
+ Operands.pop_back();
+ }
+
return false;
}
@@ -1745,7 +1755,7 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
}
PPCOperand &Op = static_cast<PPCOperand &>(AsmOp);
- if (Op.isImm() && Op.getImm() == ImmVal)
+ if (Op.isU3Imm() && Op.getImm() == ImmVal)
return Match_Success;
return Match_InvalidOperand;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 22b948a83c34..d6e02d0d0862 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -28,6 +28,7 @@
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -368,6 +369,31 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
return new PPCInstPrinter(MAI, MII, MRI, T);
}
+namespace {
+
+class PPCMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ explicit PPCMCInstrAnalysis(const MCInstrInfo *Info)
+ : MCInstrAnalysis(Info) {}
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ unsigned NumOps = Inst.getNumOperands();
+ if (NumOps == 0 ||
+ Info->get(Inst.getOpcode()).OpInfo[NumOps - 1].OperandType !=
+ MCOI::OPERAND_PCREL)
+ return false;
+ Target = Addr + Inst.getOperand(NumOps - 1).getImm() * Size;
+ return true;
+ }
+};
+
+} // end anonymous namespace
+
+static MCInstrAnalysis *createPPCMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new PPCMCInstrAnalysis(Info);
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
for (Target *T : {&getThePPC32Target(), &getThePPC32LETarget(),
&getThePPC64Target(), &getThePPC64LETarget()}) {
@@ -383,6 +409,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo);
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createPPCMCInstrAnalysis);
+
// Register the MC Code Emitter
TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter);
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 422bd11dca52..bbd5f5fd1941 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -219,6 +219,10 @@ def FeatureZeroMoveFusion:
SubtargetFeature<"fuse-zeromove", "HasZeroMoveFusion", "true",
"Target supports move to SPR with branch fusion",
[FeatureFusion]>;
+def FeatureBack2BackFusion:
+ SubtargetFeature<"fuse-back2back", "HasBack2BackFusion", "true",
+ "Target supports general back to back fusion",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 16e3b2b85c2e..f26c15667a0b 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -347,7 +347,6 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
// At the moment, all inline asm memory operands are a single register.
// In any case, the output of this routine should always be just one
// assembler operand.
-
bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode,
raw_ostream &O) {
diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
new file mode 100644
index 000000000000..38ed5f2e78e3
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
@@ -0,0 +1,1042 @@
+// Automatically generated file, do not edit!
+//
+// This file defines instruction list for general back2back fusion.
+//===----------------------------------------------------------------------===//
+FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
+ FUSION_OP_SET(ADD4,
+ ADD4O,
+ ADD4TLS,
+ ADD4_rec,
+ ADD8,
+ ADD8O,
+ ADD8TLS,
+ ADD8TLS_,
+ ADD8_rec,
+ ADDE,
+ ADDE8,
+ ADDE8O,
+ ADDEO,
+ ADDEX,
+ ADDEX8,
+ ADDI,
+ ADDI8,
+ ADDIC,
+ ADDIC8,
+ ADDIS,
+ ADDIS8,
+ ADDISdtprelHA32,
+ ADDIStocHA,
+ ADDIStocHA8,
+ ADDIdtprelL32,
+ ADDItlsldLADDR32,
+ ADDItocL,
+ ADDME,
+ ADDME8,
+ ADDME8O,
+ ADDMEO,
+ ADDZE,
+ ADDZE8,
+ ADDZE8O,
+ ADDZEO,
+ AND,
+ AND8,
+ AND8_rec,
+ ANDC,
+ ANDC8,
+ ANDC8_rec,
+ ANDC_rec,
+ ANDI8_rec,
+ ANDIS8_rec,
+ ANDIS_rec,
+ ANDI_rec,
+ AND_rec,
+ CMPB,
+ CMPB8,
+ CNTLZD,
+ CNTLZD_rec,
+ CNTLZW,
+ CNTLZW8,
+ CNTLZW8_rec,
+ CNTLZW_rec,
+ CNTTZD,
+ CNTTZD_rec,
+ CNTTZW,
+ CNTTZW8,
+ CNTTZW8_rec,
+ CNTTZW_rec,
+ EQV,
+ EQV8,
+ EQV8_rec,
+ EQV_rec,
+ EXTSB,
+ EXTSB8,
+ EXTSB8_32_64,
+ EXTSB8_rec,
+ EXTSB_rec,
+ EXTSH,
+ EXTSH8,
+ EXTSH8_32_64,
+ EXTSH8_rec,
+ EXTSH_rec,
+ EXTSW,
+ EXTSWSLI,
+ EXTSWSLI_32_64,
+ EXTSWSLI_32_64_rec,
+ EXTSWSLI_rec,
+ EXTSW_32,
+ EXTSW_32_64,
+ EXTSW_32_64_rec,
+ EXTSW_rec,
+ FABSD,
+ FABSS,
+ FCPSGND,
+ FCPSGNS,
+ FMR,
+ FNABSD,
+ FNABSS,
+ FNEGD,
+ FNEGS,
+ ISEL,
+ ISEL8,
+ LI,
+ LI8,
+ LIS,
+ LIS8,
+ MFCTR,
+ MFCTR8,
+ MFLR,
+ MFLR8,
+ MFOCRF,
+ MFOCRF8,
+ MFVRD,
+ MFVRWZ,
+ MFVSRD,
+ MFVSRWZ,
+ MTVRD,
+ MTVRWA,
+ MTVRWZ,
+ MTVSRBM,
+ MTVSRBMI,
+ MTVSRD,
+ MTVSRDM,
+ MTVSRHM,
+ MTVSRQM,
+ MTVSRWA,
+ MTVSRWM,
+ MTVSRWZ,
+ NAND,
+ NAND8,
+ NAND8_rec,
+ NAND_rec,
+ NEG,
+ NEG8,
+ NEG8O,
+ NEG8_rec,
+ NEGO,
+ NEG_rec,
+ NOP,
+ NOP_GT_PWR6,
+ NOP_GT_PWR7,
+ NOR,
+ NOR8,
+ NOR8_rec,
+ NOR_rec,
+ OR,
+ OR8,
+ OR8_rec,
+ ORC,
+ ORC8,
+ ORC8_rec,
+ ORC_rec,
+ ORI,
+ ORI8,
+ ORIS,
+ ORIS8,
+ OR_rec,
+ POPCNTB,
+ POPCNTB8,
+ POPCNTD,
+ POPCNTW,
+ RLDCL,
+ RLDCL_rec,
+ RLDCR,
+ RLDCR_rec,
+ RLDIC,
+ RLDICL,
+ RLDICL_32,
+ RLDICL_32_64,
+ RLDICL_32_rec,
+ RLDICL_rec,
+ RLDICR,
+ RLDICR_32,
+ RLDICR_rec,
+ RLDIC_rec,
+ RLDIMI,
+ RLDIMI_rec,
+ RLWIMI,
+ RLWIMI8,
+ RLWIMI8_rec,
+ RLWIMI_rec,
+ RLWINM,
+ RLWINM8,
+ RLWINM8_rec,
+ RLWINM_rec,
+ RLWNM,
+ RLWNM8,
+ RLWNM8_rec,
+ RLWNM_rec,
+ SETB,
+ SETB8,
+ SETBC,
+ SETBC8,
+ SETBCR,
+ SETBCR8,
+ SETNBC,
+ SETNBC8,
+ SETNBCR,
+ SETNBCR8,
+ SLD,
+ SLD_rec,
+ SLW,
+ SLW8,
+ SLW8_rec,
+ SLW_rec,
+ SRAD,
+ SRADI,
+ SRADI_32,
+ SRAW,
+ SRAWI,
+ SRD,
+ SRD_rec,
+ SRW,
+ SRW8,
+ SRW8_rec,
+ SRW_rec,
+ SUBF,
+ SUBF8,
+ SUBF8O,
+ SUBF8_rec,
+ SUBFE,
+ SUBFE8,
+ SUBFE8O,
+ SUBFEO,
+ SUBFIC,
+ SUBFIC8,
+ SUBFME,
+ SUBFME8,
+ SUBFME8O,
+ SUBFMEO,
+ SUBFO,
+ SUBFZE,
+ SUBFZE8,
+ SUBFZE8O,
+ SUBFZEO,
+ SUBF_rec,
+ VABSDUB,
+ VABSDUH,
+ VABSDUW,
+ VADDCUW,
+ VADDSBS,
+ VADDSHS,
+ VADDSWS,
+ VADDUBM,
+ VADDUBS,
+ VADDUDM,
+ VADDUHM,
+ VADDUHS,
+ VADDUWM,
+ VADDUWS,
+ VAND,
+ VANDC,
+ VAVGSB,
+ VAVGSH,
+ VAVGSW,
+ VAVGUB,
+ VAVGUH,
+ VAVGUW,
+ VCLZB,
+ VCLZD,
+ VCLZH,
+ VCLZW,
+ VCMPBFP,
+ VCMPBFP_rec,
+ VCMPEQFP,
+ VCMPEQFP_rec,
+ VCMPEQUB,
+ VCMPEQUB_rec,
+ VCMPEQUD,
+ VCMPEQUD_rec,
+ VCMPEQUH,
+ VCMPEQUH_rec,
+ VCMPEQUQ,
+ VCMPEQUQ_rec,
+ VCMPEQUW,
+ VCMPEQUW_rec,
+ VCMPGEFP,
+ VCMPGEFP_rec,
+ VCMPGTFP,
+ VCMPGTFP_rec,
+ VCMPGTSB,
+ VCMPGTSB_rec,
+ VCMPGTSD,
+ VCMPGTSD_rec,
+ VCMPGTSH,
+ VCMPGTSH_rec,
+ VCMPGTSQ,
+ VCMPGTSQ_rec,
+ VCMPGTSW,
+ VCMPGTSW_rec,
+ VCMPGTUB,
+ VCMPGTUB_rec,
+ VCMPGTUD,
+ VCMPGTUD_rec,
+ VCMPGTUH,
+ VCMPGTUH_rec,
+ VCMPGTUQ,
+ VCMPGTUQ_rec,
+ VCMPGTUW,
+ VCMPGTUW_rec,
+ VCMPNEB,
+ VCMPNEB_rec,
+ VCMPNEH,
+ VCMPNEH_rec,
+ VCMPNEW,
+ VCMPNEW_rec,
+ VCMPNEZB,
+ VCMPNEZB_rec,
+ VCMPNEZH,
+ VCMPNEZH_rec,
+ VCMPNEZW,
+ VCMPNEZW_rec,
+ VCNTMBB,
+ VCNTMBD,
+ VCNTMBH,
+ VCNTMBW,
+ VCTZB,
+ VCTZD,
+ VCTZH,
+ VCTZW,
+ VEQV,
+ VEXPANDBM,
+ VEXPANDDM,
+ VEXPANDHM,
+ VEXPANDQM,
+ VEXPANDWM,
+ VEXTRACTBM,
+ VEXTRACTDM,
+ VEXTRACTHM,
+ VEXTRACTQM,
+ VEXTRACTWM,
+ VEXTSB2D,
+ VEXTSB2Ds,
+ VEXTSB2W,
+ VEXTSB2Ws,
+ VEXTSD2Q,
+ VEXTSH2D,
+ VEXTSH2Ds,
+ VEXTSH2W,
+ VEXTSH2Ws,
+ VEXTSW2D,
+ VEXTSW2Ds,
+ VMAXFP,
+ VMAXSB,
+ VMAXSD,
+ VMAXSH,
+ VMAXSW,
+ VMAXUB,
+ VMAXUD,
+ VMAXUH,
+ VMAXUW,
+ VMINFP,
+ VMINSB,
+ VMINSD,
+ VMINSH,
+ VMINSW,
+ VMINUB,
+ VMINUD,
+ VMINUH,
+ VMINUW,
+ VMRGEW,
+ VMRGOW,
+ VNAND,
+ VNEGD,
+ VNEGW,
+ VNOR,
+ VOR,
+ VORC,
+ VPOPCNTB,
+ VPOPCNTD,
+ VPOPCNTH,
+ VPOPCNTW,
+ VPRTYBD,
+ VPRTYBW,
+ VRLB,
+ VRLD,
+ VRLDMI,
+ VRLDNM,
+ VRLH,
+ VRLW,
+ VRLWMI,
+ VRLWNM,
+ VSEL,
+ VSHASIGMAD,
+ VSHASIGMAW,
+ VSLB,
+ VSLD,
+ VSLH,
+ VSLW,
+ VSRAB,
+ VSRAD,
+ VSRAH,
+ VSRAW,
+ VSRB,
+ VSRD,
+ VSRH,
+ VSRW,
+ VSUBCUW,
+ VSUBSBS,
+ VSUBSHS,
+ VSUBSWS,
+ VSUBUBM,
+ VSUBUBS,
+ VSUBUDM,
+ VSUBUHM,
+ VSUBUHS,
+ VSUBUWM,
+ VSUBUWS,
+ VXOR,
+ V_SET0,
+ V_SET0B,
+ V_SET0H,
+ XOR,
+ XOR8,
+ XOR8_rec,
+ XORI,
+ XORI8,
+ XORIS,
+ XORIS8,
+ XOR_rec,
+ XSABSDP,
+ XSABSQP,
+ XSCMPEQDP,
+ XSCMPGEDP,
+ XSCMPGTDP,
+ XSCPSGNDP,
+ XSCPSGNQP,
+ XSCVHPDP,
+ XSCVSPDPN,
+ XSIEXPDP,
+ XSIEXPQP,
+ XSMAXCDP,
+ XSMAXDP,
+ XSMAXJDP,
+ XSMINCDP,
+ XSMINDP,
+ XSMINJDP,
+ XSNABSDP,
+ XSNABSQP,
+ XSNEGDP,
+ XSNEGQP,
+ XSXEXPDP,
+ XSXEXPQP,
+ XSXSIGDP,
+ XVABSDP,
+ XVABSSP,
+ XVCMPEQDP,
+ XVCMPEQDP_rec,
+ XVCMPEQSP,
+ XVCMPEQSP_rec,
+ XVCMPGEDP,
+ XVCMPGEDP_rec,
+ XVCMPGESP,
+ XVCMPGESP_rec,
+ XVCMPGTDP,
+ XVCMPGTDP_rec,
+ XVCMPGTSP,
+ XVCMPGTSP_rec,
+ XVCPSGNDP,
+ XVCPSGNSP,
+ XVCVHPSP,
+ XVIEXPDP,
+ XVIEXPSP,
+ XVMAXDP,
+ XVMAXSP,
+ XVMINDP,
+ XVMINSP,
+ XVNABSDP,
+ XVNABSSP,
+ XVNEGDP,
+ XVNEGSP,
+ XVTSTDCDP,
+ XVTSTDCSP,
+ XVXEXPDP,
+ XVXEXPSP,
+ XVXSIGDP,
+ XVXSIGSP,
+ XXLAND,
+ XXLANDC,
+ XXLEQV,
+ XXLEQVOnes,
+ XXLNAND,
+ XXLNOR,
+ XXLOR,
+ XXLORC,
+ XXLORf,
+ XXLXOR,
+ XXLXORdpz,
+ XXLXORspz,
+ XXLXORz,
+ XXSEL),
+ FUSION_OP_SET(ADD4,
+ ADD4O,
+ ADD4TLS,
+ ADD4_rec,
+ ADD8,
+ ADD8O,
+ ADD8TLS,
+ ADD8TLS_,
+ ADD8_rec,
+ ADDE,
+ ADDE8,
+ ADDE8O,
+ ADDEO,
+ ADDEX,
+ ADDEX8,
+ ADDI,
+ ADDI8,
+ ADDIC,
+ ADDIC8,
+ ADDIS,
+ ADDIS8,
+ ADDISdtprelHA32,
+ ADDIStocHA,
+ ADDIStocHA8,
+ ADDIdtprelL32,
+ ADDItlsldLADDR32,
+ ADDItocL,
+ ADDME,
+ ADDME8,
+ ADDME8O,
+ ADDMEO,
+ ADDZE,
+ ADDZE8,
+ ADDZE8O,
+ ADDZEO,
+ AND,
+ AND8,
+ AND8_rec,
+ ANDC,
+ ANDC8,
+ ANDC8_rec,
+ ANDC_rec,
+ ANDI8_rec,
+ ANDIS8_rec,
+ ANDIS_rec,
+ ANDI_rec,
+ AND_rec,
+ CMPB,
+ CMPB8,
+ CMPD,
+ CMPDI,
+ CMPEQB,
+ CMPLD,
+ CMPLDI,
+ CMPLW,
+ CMPLWI,
+ CMPRB,
+ CMPRB8,
+ CMPW,
+ CMPWI,
+ CNTLZD,
+ CNTLZD_rec,
+ CNTLZW,
+ CNTLZW8,
+ CNTLZW8_rec,
+ CNTLZW_rec,
+ CNTTZD,
+ CNTTZD_rec,
+ CNTTZW,
+ CNTTZW8,
+ CNTTZW8_rec,
+ CNTTZW_rec,
+ CR6SET,
+ CR6UNSET,
+ CRAND,
+ CRANDC,
+ CREQV,
+ CRNAND,
+ CRNOR,
+ CROR,
+ CRORC,
+ CRSET,
+ CRUNSET,
+ CRXOR,
+ DSS,
+ DSSALL,
+ DST,
+ DST64,
+ DSTST,
+ DSTST64,
+ DSTSTT,
+ DSTSTT64,
+ DSTT,
+ DSTT64,
+ EQV,
+ EQV8,
+ EQV8_rec,
+ EQV_rec,
+ EXTSB,
+ EXTSB8,
+ EXTSB8_32_64,
+ EXTSB8_rec,
+ EXTSB_rec,
+ EXTSH,
+ EXTSH8,
+ EXTSH8_32_64,
+ EXTSH8_rec,
+ EXTSH_rec,
+ EXTSW,
+ EXTSWSLI,
+ EXTSWSLI_32_64,
+ EXTSWSLI_32_64_rec,
+ EXTSWSLI_rec,
+ EXTSW_32,
+ EXTSW_32_64,
+ EXTSW_32_64_rec,
+ EXTSW_rec,
+ FABSD,
+ FABSS,
+ FCMPOD,
+ FCMPOS,
+ FCMPUD,
+ FCMPUS,
+ FCPSGND,
+ FCPSGNS,
+ FMR,
+ FNABSD,
+ FNABSS,
+ FNEGD,
+ FNEGS,
+ FTDIV,
+ FTSQRT,
+ ISEL,
+ ISEL8,
+ LI,
+ LI8,
+ LIS,
+ LIS8,
+ MCRF,
+ MCRXRX,
+ MFCTR,
+ MFCTR8,
+ MFLR,
+ MFLR8,
+ MFOCRF,
+ MFOCRF8,
+ MFVRD,
+ MFVRWZ,
+ MFVSRD,
+ MFVSRWZ,
+ MTCTR,
+ MTCTR8,
+ MTCTR8loop,
+ MTCTRloop,
+ MTLR,
+ MTLR8,
+ MTOCRF,
+ MTOCRF8,
+ MTVRD,
+ MTVRWA,
+ MTVRWZ,
+ MTVSRBM,
+ MTVSRBMI,
+ MTVSRD,
+ MTVSRDM,
+ MTVSRHM,
+ MTVSRQM,
+ MTVSRWA,
+ MTVSRWM,
+ MTVSRWZ,
+ NAND,
+ NAND8,
+ NAND8_rec,
+ NAND_rec,
+ NEG,
+ NEG8,
+ NEG8O,
+ NEG8_rec,
+ NEGO,
+ NEG_rec,
+ NOP,
+ NOP_GT_PWR6,
+ NOP_GT_PWR7,
+ NOR,
+ NOR8,
+ NOR8_rec,
+ NOR_rec,
+ OR,
+ OR8,
+ OR8_rec,
+ ORC,
+ ORC8,
+ ORC8_rec,
+ ORC_rec,
+ ORI,
+ ORI8,
+ ORIS,
+ ORIS8,
+ OR_rec,
+ POPCNTB,
+ POPCNTB8,
+ POPCNTD,
+ POPCNTW,
+ RLDCL,
+ RLDCL_rec,
+ RLDCR,
+ RLDCR_rec,
+ RLDIC,
+ RLDICL,
+ RLDICL_32,
+ RLDICL_32_64,
+ RLDICL_32_rec,
+ RLDICL_rec,
+ RLDICR,
+ RLDICR_32,
+ RLDICR_rec,
+ RLDIC_rec,
+ RLDIMI,
+ RLDIMI_rec,
+ RLWIMI,
+ RLWIMI8,
+ RLWIMI8_rec,
+ RLWIMI_rec,
+ RLWINM,
+ RLWINM8,
+ RLWINM8_rec,
+ RLWINM_rec,
+ RLWNM,
+ RLWNM8,
+ RLWNM8_rec,
+ RLWNM_rec,
+ SETB,
+ SETB8,
+ SETBC,
+ SETBC8,
+ SETBCR,
+ SETBCR8,
+ SETNBC,
+ SETNBC8,
+ SETNBCR,
+ SETNBCR8,
+ SLD,
+ SLD_rec,
+ SLW,
+ SLW8,
+ SLW8_rec,
+ SLW_rec,
+ SRAD,
+ SRADI,
+ SRADI_32,
+ SRAW,
+ SRAWI,
+ SRD,
+ SRD_rec,
+ SRW,
+ SRW8,
+ SRW8_rec,
+ SRW_rec,
+ SUBF,
+ SUBF8,
+ SUBF8O,
+ SUBF8_rec,
+ SUBFE,
+ SUBFE8,
+ SUBFE8O,
+ SUBFEO,
+ SUBFIC,
+ SUBFIC8,
+ SUBFME,
+ SUBFME8,
+ SUBFME8O,
+ SUBFMEO,
+ SUBFO,
+ SUBFZE,
+ SUBFZE8,
+ SUBFZE8O,
+ SUBFZEO,
+ SUBF_rec,
+ TD,
+ TDI,
+ TRAP,
+ TW,
+ TWI,
+ VABSDUB,
+ VABSDUH,
+ VABSDUW,
+ VADDCUW,
+ VADDSBS,
+ VADDSHS,
+ VADDSWS,
+ VADDUBM,
+ VADDUBS,
+ VADDUDM,
+ VADDUHM,
+ VADDUHS,
+ VADDUWM,
+ VADDUWS,
+ VAND,
+ VANDC,
+ VAVGSB,
+ VAVGSH,
+ VAVGSW,
+ VAVGUB,
+ VAVGUH,
+ VAVGUW,
+ VCLZB,
+ VCLZD,
+ VCLZH,
+ VCLZW,
+ VCMPBFP,
+ VCMPBFP_rec,
+ VCMPEQFP,
+ VCMPEQFP_rec,
+ VCMPEQUB,
+ VCMPEQUB_rec,
+ VCMPEQUD,
+ VCMPEQUD_rec,
+ VCMPEQUH,
+ VCMPEQUH_rec,
+ VCMPEQUQ,
+ VCMPEQUQ_rec,
+ VCMPEQUW,
+ VCMPEQUW_rec,
+ VCMPGEFP,
+ VCMPGEFP_rec,
+ VCMPGTFP,
+ VCMPGTFP_rec,
+ VCMPGTSB,
+ VCMPGTSB_rec,
+ VCMPGTSD,
+ VCMPGTSD_rec,
+ VCMPGTSH,
+ VCMPGTSH_rec,
+ VCMPGTSQ,
+ VCMPGTSQ_rec,
+ VCMPGTSW,
+ VCMPGTSW_rec,
+ VCMPGTUB,
+ VCMPGTUB_rec,
+ VCMPGTUD,
+ VCMPGTUD_rec,
+ VCMPGTUH,
+ VCMPGTUH_rec,
+ VCMPGTUQ,
+ VCMPGTUQ_rec,
+ VCMPGTUW,
+ VCMPGTUW_rec,
+ VCMPNEB,
+ VCMPNEB_rec,
+ VCMPNEH,
+ VCMPNEH_rec,
+ VCMPNEW,
+ VCMPNEW_rec,
+ VCMPNEZB,
+ VCMPNEZB_rec,
+ VCMPNEZH,
+ VCMPNEZH_rec,
+ VCMPNEZW,
+ VCMPNEZW_rec,
+ VCMPSQ,
+ VCMPUQ,
+ VCNTMBB,
+ VCNTMBD,
+ VCNTMBH,
+ VCNTMBW,
+ VCTZB,
+ VCTZD,
+ VCTZH,
+ VCTZW,
+ VEQV,
+ VEXPANDBM,
+ VEXPANDDM,
+ VEXPANDHM,
+ VEXPANDQM,
+ VEXPANDWM,
+ VEXTRACTBM,
+ VEXTRACTDM,
+ VEXTRACTHM,
+ VEXTRACTQM,
+ VEXTRACTWM,
+ VEXTSB2D,
+ VEXTSB2Ds,
+ VEXTSB2W,
+ VEXTSB2Ws,
+ VEXTSD2Q,
+ VEXTSH2D,
+ VEXTSH2Ds,
+ VEXTSH2W,
+ VEXTSH2Ws,
+ VEXTSW2D,
+ VEXTSW2Ds,
+ VMAXFP,
+ VMAXSB,
+ VMAXSD,
+ VMAXSH,
+ VMAXSW,
+ VMAXUB,
+ VMAXUD,
+ VMAXUH,
+ VMAXUW,
+ VMINFP,
+ VMINSB,
+ VMINSD,
+ VMINSH,
+ VMINSW,
+ VMINUB,
+ VMINUD,
+ VMINUH,
+ VMINUW,
+ VMRGEW,
+ VMRGOW,
+ VNAND,
+ VNEGD,
+ VNEGW,
+ VNOR,
+ VOR,
+ VORC,
+ VPOPCNTB,
+ VPOPCNTD,
+ VPOPCNTH,
+ VPOPCNTW,
+ VPRTYBD,
+ VPRTYBW,
+ VRLB,
+ VRLD,
+ VRLDMI,
+ VRLDNM,
+ VRLH,
+ VRLW,
+ VRLWMI,
+ VRLWNM,
+ VSEL,
+ VSHASIGMAD,
+ VSHASIGMAW,
+ VSLB,
+ VSLD,
+ VSLH,
+ VSLW,
+ VSRAB,
+ VSRAD,
+ VSRAH,
+ VSRAW,
+ VSRB,
+ VSRD,
+ VSRH,
+ VSRW,
+ VSUBCUW,
+ VSUBSBS,
+ VSUBSHS,
+ VSUBSWS,
+ VSUBUBM,
+ VSUBUBS,
+ VSUBUDM,
+ VSUBUHM,
+ VSUBUHS,
+ VSUBUWM,
+ VSUBUWS,
+ VXOR,
+ V_SET0,
+ V_SET0B,
+ V_SET0H,
+ WAIT,
+ XOR,
+ XOR8,
+ XOR8_rec,
+ XORI,
+ XORI8,
+ XORIS,
+ XORIS8,
+ XOR_rec,
+ XSABSDP,
+ XSABSQP,
+ XSCMPEQDP,
+ XSCMPEXPDP,
+ XSCMPGEDP,
+ XSCMPGTDP,
+ XSCMPODP,
+ XSCMPUDP,
+ XSCPSGNDP,
+ XSCPSGNQP,
+ XSCVHPDP,
+ XSCVSPDPN,
+ XSIEXPDP,
+ XSIEXPQP,
+ XSMAXCDP,
+ XSMAXDP,
+ XSMAXJDP,
+ XSMINCDP,
+ XSMINDP,
+ XSMINJDP,
+ XSNABSDP,
+ XSNABSQP,
+ XSNEGDP,
+ XSNEGQP,
+ XSTDIVDP,
+ XSTSQRTDP,
+ XSTSTDCDP,
+ XSTSTDCSP,
+ XSXEXPDP,
+ XSXEXPQP,
+ XSXSIGDP,
+ XVABSDP,
+ XVABSSP,
+ XVCMPEQDP,
+ XVCMPEQDP_rec,
+ XVCMPEQSP,
+ XVCMPEQSP_rec,
+ XVCMPGEDP,
+ XVCMPGEDP_rec,
+ XVCMPGESP,
+ XVCMPGESP_rec,
+ XVCMPGTDP,
+ XVCMPGTDP_rec,
+ XVCMPGTSP,
+ XVCMPGTSP_rec,
+ XVCPSGNDP,
+ XVCPSGNSP,
+ XVCVHPSP,
+ XVIEXPDP,
+ XVIEXPSP,
+ XVMAXDP,
+ XVMAXSP,
+ XVMINDP,
+ XVMINSP,
+ XVNABSDP,
+ XVNABSSP,
+ XVNEGDP,
+ XVNEGSP,
+ XVTDIVDP,
+ XVTDIVSP,
+ XVTLSBB,
+ XVTSQRTDP,
+ XVTSQRTSP,
+ XVTSTDCDP,
+ XVTSTDCSP,
+ XVXEXPDP,
+ XVXEXPSP,
+ XVXSIGDP,
+ XVXSIGSP,
+ XXLAND,
+ XXLANDC,
+ XXLEQV,
+ XXLEQVOnes,
+ XXLNAND,
+ XXLNOR,
+ XXLOR,
+ XXLORC,
+ XXLORf,
+ XXLXOR,
+ XXLXORdpz,
+ XXLXORspz,
+ XXLXORz,
+ XXSEL)) \ No newline at end of file
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index a2664bcff4ab..ba74af5ef5f7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4464,9 +4464,10 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
+ MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
SDValue AddrOp;
- if (LDN)
- AddrOp = LDN->getOperand(1);
+ if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
+ AddrOp = N->getOperand(1);
else if (STN)
AddrOp = STN->getOperand(2);
@@ -5973,6 +5974,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (Type != MVT::v16i8 && Type != MVT::v8i16)
break;
+ // If the alignment for the load is 16 or bigger, we don't need the
+ // permutated mask to get the required value. The value must be the 0
+ // element in big endian target or 7/15 in little endian target in the
+ // result vsx register of lvx instruction.
+ // Select the instruction in the .td file.
+ if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
+ isOffsetMultipleOf(N, 16))
+ break;
+
SDValue ZeroReg =
CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ec7e30d7e362..8d6edf07bc53 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3500,15 +3500,16 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (LHS.getValueType() == MVT::v2i64) {
// Equality can be handled by casting to the legal type for Altivec
// comparisons, everything else needs to be expanded.
- if (CC == ISD::SETEQ || CC == ISD::SETNE) {
- return DAG.getNode(
- ISD::BITCAST, dl, MVT::v2i64,
- DAG.getSetCC(dl, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
- DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC));
- }
-
- return SDValue();
+ if (CC != ISD::SETEQ && CC != ISD::SETNE)
+ return SDValue();
+ SDValue SetCC32 = DAG.getSetCC(
+ dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
+ int ShuffV[] = {1, 0, 3, 2};
+ SDValue Shuff =
+ DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
+ return DAG.getBitcast(
+ MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32));
}
// We handle most of these in the usual way.
@@ -6206,20 +6207,13 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
ArgOffset += PtrByteSize;
continue;
}
- // Copy entire object into memory. There are cases where gcc-generated
- // code assumes it is there, even if it could be put entirely into
- // registers. (This is not what the doc says.)
-
- // FIXME: The above statement is likely due to a misunderstanding of the
- // documents. All arguments must be copied into the parameter area BY
- // THE CALLEE in the event that the callee takes the address of any
- // formal argument. That has not yet been implemented. However, it is
- // reasonable to use the stack area as a staging area for the register
- // load.
-
- // Skip this for small aggregates, as we will use the same slot for a
- // right-justified copy, below.
- if (Size >= 8)
+ // Copy the object to parameter save area if it can not be entirely passed
+ // by registers.
+ // FIXME: we only need to copy the parts which need to be passed in
+ // parameter save area. For the parts passed by registers, we don't need
+ // to copy them to the stack although we need to allocate space for them
+ // in parameter save area.
+ if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
CallSeqStart,
Flags, DAG, dl);
@@ -17548,14 +17542,14 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
(ParentOp == ISD::INTRINSIC_VOID))) {
unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
- assert(
- ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) &&
- "Only the paired load and store (lxvp/stxvp) intrinsics are valid.");
- SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) ? Parent->getOperand(2)
- : Parent->getOperand(3);
- computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
- FlagSet |= PPC::MOF_Vector;
- return FlagSet;
+ if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
+ SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
+ ? Parent->getOperand(2)
+ : Parent->getOperand(3);
+ computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
+ FlagSet |= PPC::MOF_Vector;
+ return FlagSet;
+ }
}
// Mark this as something we don't want to handle here if it is atomic
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 2cfd53de3290..c16e146da247 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -393,7 +393,9 @@ public:
MachineInstr &NewMI1,
MachineInstr &NewMI2) const override;
- void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const override;
+ // PowerPC specific version of setSpecialOperandAttr that copies Flags to MI
+ // and clears nuw, nsw, and exact flags.
+ void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const;
bool isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index d83ecc699b19..2340be5b5915 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -4780,6 +4780,7 @@ class PPCAsmPseudo<string asm, dag iops>
def : InstAlias<"sc", (SC 0)>;
def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"hwsync", (SYNC 0), 0>, Requires<[HasSYNC]>;
def : InstAlias<"msync", (SYNC 0), 0>, Requires<[HasSYNC]>;
def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>;
def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index d92a10c5b208..110f7d79fbc5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -158,6 +158,11 @@ def HasP9Vector : Predicate<"Subtarget->hasP9Vector()">;
def NoP9Altivec : Predicate<"!Subtarget->hasP9Altivec()">;
def NoP10Vector: Predicate<"!Subtarget->hasP10Vector()">;
+def PPCldsplatAlign16 : PatFrag<(ops node:$ptr), (PPCldsplat node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
+ isOffsetMultipleOf(N, 16);
+}]>;
+
//--------------------- VSX-specific instruction formats ---------------------//
// By default, all VSX instructions are to be selected over their Altivec
// counter parts and they do not have unmodeled sideeffects.
@@ -3180,6 +3185,12 @@ defm : ScalToVecWPermute<
v2f64, (f64 (load ForceXForm:$src)),
(XXPERMDIs (XFLOADf64 ForceXForm:$src), 2),
(SUBREG_TO_REG (i64 1), (XFLOADf64 ForceXForm:$src), sub_64)>;
+
+// Splat loads.
+def : Pat<(v8i16 (PPCldsplatAlign16 ForceXForm:$A)),
+ (v8i16 (VSPLTH 7, (LVX ForceXForm:$A)))>;
+def : Pat<(v16i8 (PPCldsplatAlign16 ForceXForm:$A)),
+ (v16i8 (VSPLTB 15, (LVX ForceXForm:$A)))>;
} // HasVSX, NoP9Vector, IsLittleEndian
let Predicates = [HasVSX, NoP9Vector, IsBigEndian] in {
@@ -3187,6 +3198,12 @@ let Predicates = [HasVSX, NoP9Vector, IsBigEndian] in {
(LXVD2X ForceXForm:$src)>;
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, ForceXForm:$dst),
(STXVD2X $rS, ForceXForm:$dst)>;
+
+ // Splat loads.
+ def : Pat<(v8i16 (PPCldsplatAlign16 ForceXForm:$A)),
+ (v8i16 (VSPLTH 0, (LVX ForceXForm:$A)))>;
+ def : Pat<(v16i8 (PPCldsplatAlign16 ForceXForm:$A)),
+ (v16i8 (VSPLTB 0, (LVX ForceXForm:$A)))>;
} // HasVSX, NoP9Vector, IsBigEndian
// Any VSX subtarget that only has loads and stores that load in big endian
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 7f63827afbd6..0c7be96a0595 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -413,9 +413,9 @@ bool PPCLoopInstrFormPrep::runOnFunction(Function &F) {
bool MadeChange = false;
- for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
- for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
- MadeChange |= runOnLoop(*L);
+ for (Loop *I : *LI)
+ for (Loop *L : depth_first(I))
+ MadeChange |= runOnLoop(L);
return MadeChange;
}
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
index e4954b722fd0..6b8ad22639c8 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -153,5 +153,7 @@ FUSION_FEATURE(ZeroMoveLR, hasZeroMoveFusion, -1,
FUSION_OP_SET(MTLR8, MTLR, MTSPR8, MTSPR),
FUSION_OP_SET(BCLR, BCLRn, gBCLR, BCLRL, BCLRLn, gBCLRL))
+#include "PPCBack2BackFusion.def"
+
#undef FUSION_FEATURE
#undef FUSION_OP_SET
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 1258a1281597..f11b4e14073e 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -135,6 +135,7 @@ void PPCSubtarget::initializeEnvironment() {
HasCompareFusion = false;
HasWideImmFusion = false;
HasZeroMoveFusion = false;
+ HasBack2BackFusion = false;
IsISA2_06 = false;
IsISA2_07 = false;
IsISA3_0 = false;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index d52833cb1465..1300b62b623a 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -155,6 +155,7 @@ protected:
bool HasCompareFusion;
bool HasWideImmFusion;
bool HasZeroMoveFusion;
+ bool HasBack2BackFusion;
bool IsISA2_06;
bool IsISA2_07;
bool IsISA3_0;
@@ -348,6 +349,7 @@ public:
bool hasWideImmFusion() const { return HasWideImmFusion; }
bool hasSha3Fusion() const { return HasSha3Fusion; }
bool hasZeroMoveFusion() const { return HasZeroMoveFusion; }
+ bool hasBack2BackFusion() const { return HasBack2BackFusion; }
bool needsSwapsForVSXMemOps() const {
return hasVSX() && isLittleEndian() && !hasP9Vector();
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 5d6f58a77a39..ed28731b8ef2 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -328,10 +328,6 @@ static bool isMMAType(Type *Ty) {
InstructionCost PPCTTIImpl::getUserCost(const User *U,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
- // Set the max cost if an MMA type is present (v256i1, v512i1).
- if (isMMAType(U->getType()))
- return InstructionCost::getMax();
-
// We already implement getCastInstrCost and getMemoryOpCost where we perform
// the vector adjustment there.
if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
@@ -1276,23 +1272,21 @@ PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
-bool PPCTTIImpl::areFunctionArgsABICompatible(
- const Function *Caller, const Function *Callee,
- SmallPtrSetImpl<Argument *> &Args) const {
+bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,
+ const Function *Callee,
+ const ArrayRef<Type *> &Types) const {
// We need to ensure that argument promotion does not
// attempt to promote pointers to MMA types (__vector_pair
// and __vector_quad) since these types explicitly cannot be
// passed as arguments. Both of these types are larger than
// the 128-bit Altivec vectors and have a scalar size of 1 bit.
- if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
+ if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
return false;
- return llvm::none_of(Args, [](Argument *A) {
- auto *EltTy = cast<PointerType>(A->getType())->getElementType();
- if (EltTy->isSized())
- return (EltTy->isIntOrIntVectorTy(1) &&
- EltTy->getPrimitiveSizeInBits() > 128);
+ return llvm::none_of(Types, [](Type *Ty) {
+ if (Ty->isSized())
+ return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;
return false;
});
}
@@ -1388,3 +1382,86 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
return false;
}
+
+bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType,
+ Align Alignment) const {
+ // Only load and stores instructions can have variable vector length on Power.
+ if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+ return false;
+ // Loads/stores with length instructions use bits 0-7 of the GPR operand and
+ // therefore cannot be used in 32-bit mode.
+ if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
+ return false;
+ if (isa<FixedVectorType>(DataType)) {
+ unsigned VecWidth = DataType->getPrimitiveSizeInBits();
+ return VecWidth == 128;
+ }
+ Type *ScalarTy = DataType->getScalarType();
+
+ if (ScalarTy->isPointerTy())
+ return true;
+
+ if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+ return true;
+
+ if (!ScalarTy->isIntegerTy())
+ return false;
+
+ unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+ return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
+}
+
+InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
+ AddressSpace, CostKind, I);
+ if (TLI->getValueType(DL, Src, true) == MVT::Other)
+ return Cost;
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost;
+
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
+ assert(SrcVTy && "Expected a vector type for VP memory operations");
+
+ if (hasActiveVectorLength(Opcode, Src, Alignment)) {
+ std::pair<InstructionCost, MVT> LT =
+ TLI->getTypeLegalizationCost(DL, SrcVTy);
+
+ InstructionCost CostFactor =
+ vectorCostAdjustmentFactor(Opcode, Src, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
+ InstructionCost Cost = LT.first * CostFactor;
+ assert(Cost.isValid() && "Expected valid cost");
+
+ // On P9 but not on P10, if the op is misaligned then it will cause a
+ // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
+ // ones.
+ const Align DesiredAlignment(16);
+ if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
+ return Cost;
+
+ // Since alignment may be under estimated, we try to compute the probability
+ // that the actual address is aligned to the desired boundary. For example
+ // an 8-byte aligned load is assumed to be actually 16-byte aligned half the
+ // time, while a 4-byte aligned load has a 25% chance of being 16-byte
+ // aligned.
+ float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
+ float MisalignmentProb = 1.0 - AlignmentProb;
+ return (MisalignmentProb * P9PipelineFlushEstimate) +
+ (AlignmentProb * *Cost.getValue());
+ }
+
+ // Usually we should not get to this point, but the following is an attempt to
+ // model the cost of legalization. Currently we can only lower intrinsics with
+ // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
+ return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 7aeb0c59d503..0af6f2a308d9 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -134,9 +134,19 @@ public:
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
- bool areFunctionArgsABICompatible(const Function *Caller,
- const Function *Callee,
- SmallPtrSetImpl<Argument *> &Args) const;
+ bool areTypesABICompatible(const Function *Caller, const Function *Callee,
+ const ArrayRef<Type *> &Types) const;
+ bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
+ Align Alignment) const;
+ InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
+private:
+ // The following constant is used for estimating costs on power9.
+ static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
+
/// @}
};
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index f00813f1301a..75592dd4c6f5 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -169,6 +169,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseJALOffset(OperandVector &Operands);
OperandMatchResultTy parseVTypeI(OperandVector &Operands);
OperandMatchResultTy parseMaskReg(OperandVector &Operands);
+ OperandMatchResultTy parseInsnDirectiveOpcode(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
@@ -827,6 +828,7 @@ public:
Op->SysReg.Length = Str.size();
Op->SysReg.Encoding = Encoding;
Op->StartLoc = S;
+ Op->EndLoc = S;
Op->IsRV64 = IsRV64;
return Op;
}
@@ -836,6 +838,7 @@ public:
auto Op = std::make_unique<RISCVOperand>(KindTy::VType);
Op->VType.Val = VTypeI;
Op->StartLoc = S;
+ Op->EndLoc = S;
Op->IsRV64 = IsRV64;
return Op;
}
@@ -1291,7 +1294,7 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
if (HadParens)
Operands.push_back(RISCVOperand::createToken("(", FirstS, isRV64()));
SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
getLexer().Lex();
Operands.push_back(RISCVOperand::createReg(RegNo, S, E, isRV64()));
}
@@ -1305,6 +1308,67 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
}
OperandMatchResultTy
+RISCVAsmParser::parseInsnDirectiveOpcode(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E;
+ const MCExpr *Res;
+
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_NoMatch;
+ case AsmToken::LParen:
+ case AsmToken::Minus:
+ case AsmToken::Plus:
+ case AsmToken::Exclaim:
+ case AsmToken::Tilde:
+ case AsmToken::Integer:
+ case AsmToken::String: {
+ if (getParser().parseExpression(Res, E))
+ return MatchOperand_ParseFail;
+
+ auto *CE = dyn_cast<MCConstantExpr>(Res);
+ if (CE) {
+ int64_t Imm = CE->getValue();
+ if (isUInt<7>(Imm)) {
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return MatchOperand_Success;
+ }
+ }
+
+ Twine Msg = "immediate must be an integer in the range";
+ Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]");
+ return MatchOperand_ParseFail;
+ }
+ case AsmToken::Identifier: {
+ StringRef Identifier;
+ if (getParser().parseIdentifier(Identifier))
+ return MatchOperand_ParseFail;
+
+ auto Opcode = RISCVInsnOpcode::lookupRISCVOpcodeByName(Identifier);
+ if (Opcode) {
+ Res = MCConstantExpr::create(Opcode->Value, getContext());
+ E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return MatchOperand_Success;
+ }
+
+ Twine Msg = "operand must be a valid opcode name or an "
+ "integer in the range";
+ Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]");
+ return MatchOperand_ParseFail;
+ }
+ case AsmToken::Percent: {
+ // Discard operand with modifier.
+ Twine Msg = "immediate must be an integer in the range";
+ Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ return MatchOperand_NoMatch;
+}
+
+OperandMatchResultTy
RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Res;
@@ -1381,7 +1445,7 @@ RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ SMLoc E;
const MCExpr *Res;
switch (getLexer().getKind()) {
@@ -1396,7 +1460,7 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
case AsmToken::Integer:
case AsmToken::String:
case AsmToken::Identifier:
- if (getParser().parseExpression(Res))
+ if (getParser().parseExpression(Res, E))
return MatchOperand_ParseFail;
break;
case AsmToken::Percent:
@@ -1410,7 +1474,7 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
OperandMatchResultTy
RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) {
SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ SMLoc E;
if (getLexer().getKind() != AsmToken::Percent) {
Error(getLoc(), "expected '%' for operand modifier");
@@ -1449,7 +1513,6 @@ RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) {
OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
const MCExpr *Res;
if (getLexer().getKind() != AsmToken::Identifier)
@@ -1461,6 +1524,8 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
if (getParser().parseIdentifier(Identifier))
return MatchOperand_ParseFail;
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
+
if (Identifier.consume_back("@plt")) {
Error(getLoc(), "'@plt' operand not valid for instruction");
return MatchOperand_ParseFail;
@@ -1492,7 +1557,7 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
}
const MCExpr *Expr;
- if (getParser().parseExpression(Expr))
+ if (getParser().parseExpression(Expr, E))
return MatchOperand_ParseFail;
Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
@@ -1501,7 +1566,6 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
const MCExpr *Res;
if (getLexer().getKind() != AsmToken::Identifier)
@@ -1515,6 +1579,8 @@ OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
if (getParser().parseIdentifier(Identifier))
return MatchOperand_ParseFail;
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
+
RISCVMCExpr::VariantKind Kind = RISCVMCExpr::VK_RISCV_CALL;
if (Identifier.consume_back("@plt"))
Kind = RISCVMCExpr::VK_RISCV_CALL_PLT;
@@ -1529,10 +1595,10 @@ OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
OperandMatchResultTy
RISCVAsmParser::parsePseudoJumpSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ SMLoc E;
const MCExpr *Res;
- if (getParser().parseExpression(Res))
+ if (getParser().parseExpression(Res, E))
return MatchOperand_ParseFail;
if (Res->getKind() != MCExpr::ExprKind::SymbolRef ||
@@ -1662,7 +1728,7 @@ OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) {
if (RegNo != RISCV::V0)
return MatchOperand_NoMatch;
SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
getLexer().Lex();
Operands.push_back(RISCVOperand::createReg(RegNo, S, E, isRV64()));
}
@@ -2062,7 +2128,11 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
"unexpected token in '.attribute' directive"))
return true;
- if (Tag == RISCVAttrs::ARCH) {
+ if (IsIntegerValue)
+ getTargetStreamer().emitAttribute(Tag, IntegerValue);
+ else if (Tag != RISCVAttrs::ARCH)
+ getTargetStreamer().emitTextAttribute(Tag, StringValue);
+ else {
StringRef Arch = StringValue;
for (auto Feature : RISCVFeatureKV)
if (llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
@@ -2070,7 +2140,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
auto ParseResult = llvm::RISCVISAInfo::parseArchString(
StringValue, /*EnableExperimentalExtension=*/true,
- /*ExperimentalExtensionVersionCheck=*/false);
+ /*ExperimentalExtensionVersionCheck=*/true);
if (!ParseResult) {
std::string Buffer;
raw_string_ostream OutputErrMsg(Buffer);
@@ -2093,35 +2163,9 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
setFeatureBits(RISCV::Feature64Bit, "64bit");
else
return Error(ValueExprLoc, "bad arch string " + Arch);
- }
-
- if (IsIntegerValue)
- getTargetStreamer().emitAttribute(Tag, IntegerValue);
- else {
- if (Tag != RISCVAttrs::ARCH) {
- getTargetStreamer().emitTextAttribute(Tag, StringValue);
- } else {
- std::vector<std::string> FeatureVector;
- RISCVFeatures::toFeatureVector(FeatureVector, getSTI().getFeatureBits());
- // Parse that by RISCVISAInfo->
- unsigned XLen = getFeatureBits(RISCV::Feature64Bit) ? 64 : 32;
- auto ParseResult = llvm::RISCVISAInfo::parseFeatures(XLen, FeatureVector);
- if (!ParseResult) {
- std::string Buffer;
- raw_string_ostream OutputErrMsg(Buffer);
- handleAllErrors(ParseResult.takeError(),
- [&](llvm::StringError &ErrMsg) {
- OutputErrMsg << ErrMsg.getMessage();
- });
-
- return Error(ValueExprLoc, OutputErrMsg.str());
- }
- auto &ISAInfo = *ParseResult;
-
- // Then emit the arch string.
- getTargetStreamer().emitTextAttribute(Tag, ISAInfo->toString());
- }
+ // Then emit the arch string.
+ getTargetStreamer().emitTextAttribute(Tag, ISAInfo->toString());
}
return false;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 0aba18b20f0d..144e761f002d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -27,6 +27,11 @@ namespace RISCVSysReg {
#include "RISCVGenSearchableTables.inc"
} // namespace RISCVSysReg
+namespace RISCVInsnOpcode {
+#define GET_RISCVOpcodesList_IMPL
+#include "RISCVGenSearchableTables.inc"
+} // namespace RISCVInsnOpcode
+
namespace RISCVABI {
ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
StringRef ABIName) {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index d8f4403c824f..9cfd36745f46 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -299,6 +299,16 @@ struct SysReg {
#include "RISCVGenSearchableTables.inc"
} // end namespace RISCVSysReg
+namespace RISCVInsnOpcode {
+struct RISCVOpcode {
+ const char *Name;
+ unsigned Value;
+};
+
+#define GET_RISCVOpcodesList_DECL
+#include "RISCVGenSearchableTables.inc"
+} // end namespace RISCVInsnOpcode
+
namespace RISCVABI {
enum ABI {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index f1c3810f4ee5..89a7d54f60f8 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -171,9 +171,9 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
// Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx,
- // or non-zero bits 8/9/10.
+ // or non-zero in bits 8 and above.
if (RISCVVType::getVLMUL(Imm) == RISCVII::VLMUL::LMUL_RESERVED ||
- RISCVVType::getSEW(Imm) > 64 || (Imm & 0x700) != 0) {
+ RISCVVType::getSEW(Imm) > 64 || (Imm >> 8) != 0) {
O << Imm;
return;
}
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 772a4f8ecd53..6aa915c01929 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -168,14 +168,6 @@ def HasStdExtZvlsseg : Predicate<"Subtarget->hasStdExtZvlsseg()">,
AssemblerPredicate<(all_of FeatureStdExtZvlsseg),
"'Zvlsseg' (Vector segment load/store instructions)">;
-def FeatureStdExtZvamo
- : SubtargetFeature<"experimental-zvamo", "HasStdExtZvamo", "true",
- "'Zvamo' (Vector AMO Operations)",
- [FeatureStdExtV]>;
-def HasStdExtZvamo : Predicate<"Subtarget->hasStdExtZvamo()">,
- AssemblerPredicate<(all_of FeatureStdExtZvamo),
- "'Zvamo' (Vector AMO Operations)">;
-
def Feature64Bit
: SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">;
def IsRV64 : Predicate<"Subtarget->is64Bit()">,
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 66a34d73dd37..b24eb5f7bbf4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -718,6 +718,71 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
break;
}
+ case ISD::MUL: {
+ // Special case for calculating (mul (and X, C2), C1) where the full product
+ // fits in XLen bits. We can shift X left by the number of leading zeros in
+ // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
+ // product has XLen trailing zeros, putting it in the output of MULHU. This
+ // can avoid materializing a constant in a register for C2.
+
+ // RHS should be a constant.
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C || !N1C->hasOneUse())
+ break;
+
+ // LHS should be an AND with constant.
+ SDValue N0 = Node->getOperand(0);
+ if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
+ break;
+
+ uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+
+ // Constant should be a mask.
+ if (!isMask_64(C2))
+ break;
+
+ // This should be the only use of the AND unless we will use
+ // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND
+ // constants.
+ if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF))
+ break;
+
+ // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this
+ // optimization.
+ if (isInt<12>(C2) ||
+ (C2 == UINT64_C(0xFFFF) &&
+ (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) ||
+ (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()))
+ break;
+
+ // We need to shift left the AND input and C1 by a total of XLen bits.
+
+ // How far left do we need to shift the AND input?
+ unsigned XLen = Subtarget->getXLen();
+ unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2));
+
+ // The constant gets shifted by the remaining amount unless that would
+ // shift bits out.
+ uint64_t C1 = N1C->getZExtValue();
+ unsigned ConstantShift = XLen - LeadingZeros;
+ if (ConstantShift > (XLen - (64 - countLeadingZeros(C1))))
+ break;
+
+ uint64_t ShiftedC1 = C1 << ConstantShift;
+ // If this RV32, we need to sign extend the constant.
+ if (XLen == 32)
+ ShiftedC1 = SignExtend64(ShiftedC1, 32);
+
+ // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
+ SDNode *Imm = selectImm(CurDAG, DL, ShiftedC1, *Subtarget);
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(LeadingZeros, DL, VT));
+ SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
+ SDValue(SLLI, 0), SDValue(Imm, 0));
+ ReplaceNode(Node, MULHU);
+ return;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = Node->getConstantOperandVal(0);
switch (IntNo) {
@@ -1450,6 +1515,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Extract.getNode());
return;
}
+ case ISD::SPLAT_VECTOR:
case RISCVISD::VMV_V_X_VL:
case RISCVISD::VFMV_V_F_VL: {
// Try to match splat of a scalar load to a strided load with stride of x0.
@@ -1466,7 +1532,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
break;
SDValue VL;
- selectVLOp(Node->getOperand(1), VL);
+ if (Node->getOpcode() == ISD::SPLAT_VECTOR)
+ VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
+ else
+ selectVLOp(Node->getOperand(1), VL);
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f3331571fc55..4f5512e6fb37 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -330,6 +330,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::f16, Legal);
setOperationAction(ISD::LROUND, MVT::f16, Legal);
setOperationAction(ISD::LLROUND, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
@@ -367,6 +375,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::f32, Legal);
setOperationAction(ISD::LROUND, MVT::f32, Legal);
setOperationAction(ISD::LLROUND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
@@ -388,6 +402,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::f64, Legal);
setOperationAction(ISD::LROUND, MVT::f64, Legal);
setOperationAction(ISD::LLROUND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
@@ -412,6 +434,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal);
+
setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
@@ -471,12 +498,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
- ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN};
+ ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
+ ISD::VP_SELECT};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
- ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX};
+ ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SELECT};
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
@@ -519,6 +547,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::VP_AND, VT, Custom);
+ setOperationAction(ISD::VP_OR, VT, Custom);
+ setOperationAction(ISD::VP_XOR, VT, Custom);
+
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
@@ -803,6 +835,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
+ setOperationAction(ISD::VP_AND, VT, Custom);
+ setOperationAction(ISD::VP_OR, VT, Custom);
+ setOperationAction(ISD::VP_XOR, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
@@ -1147,7 +1182,7 @@ bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasStdExtZbb();
}
-bool RISCVTargetLowering::hasAndNot(SDValue Y) const {
+bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
EVT VT = Y.getValueType();
// FIXME: Support vectors once we have tests.
@@ -1235,7 +1270,8 @@ bool RISCVTargetLowering::shouldSinkOperands(
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
- if (VT == MVT::f16 && !Subtarget.hasStdExtZfhmin())
+ // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
+ if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
return false;
if (VT == MVT::f32 && !Subtarget.hasStdExtF())
return false;
@@ -1255,9 +1291,10 @@ bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled.
+ // Use f32 to pass f16 if it is legal and Zfh is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
- if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin())
+ // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
+ if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
return MVT::f32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
@@ -1266,9 +1303,10 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled.
+ // Use f32 to pass f16 if it is legal and Zfh is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
- if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin())
+ // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
+ if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
@@ -1959,29 +1997,37 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
int64_t StepNumerator = SimpleVID->StepNumerator;
unsigned StepDenominator = SimpleVID->StepDenominator;
int64_t Addend = SimpleVID->Addend;
+
+ assert(StepNumerator != 0 && "Invalid step");
+ bool Negate = false;
+ int64_t SplatStepVal = StepNumerator;
+ unsigned StepOpcode = ISD::MUL;
+ if (StepNumerator != 1) {
+ if (isPowerOf2_64(std::abs(StepNumerator))) {
+ Negate = StepNumerator < 0;
+ StepOpcode = ISD::SHL;
+ SplatStepVal = Log2_64(std::abs(StepNumerator));
+ }
+ }
+
// Only emit VIDs with suitably-small steps/addends. We use imm5 is a
// threshold since it's the immediate value many RVV instructions accept.
- if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) &&
- isInt<5>(Addend)) {
+ // There is no vmul.vi instruction so ensure multiply constant can fit in
+ // a single addi instruction.
+ if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
+ (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
+ isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) {
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
// Convert right out of the scalable type so we can use standard ISD
// nodes for the rest of the computation. If we used scalable types with
// these, we'd lose the fixed-length vector info and generate worse
// vsetvli code.
VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
- assert(StepNumerator != 0 && "Invalid step");
- bool Negate = false;
- if (StepNumerator != 1) {
- int64_t SplatStepVal = StepNumerator;
- unsigned Opcode = ISD::MUL;
- if (isPowerOf2_64(std::abs(StepNumerator))) {
- Negate = StepNumerator < 0;
- Opcode = ISD::SHL;
- SplatStepVal = Log2_64(std::abs(StepNumerator));
- }
+ if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
+ (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
SDValue SplatStep = DAG.getSplatVector(
VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
- VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep);
+ VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
}
if (StepDenominator != 1) {
SDValue SplatStep = DAG.getSplatVector(
@@ -3133,6 +3179,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING:
return lowerSET_ROUNDING(Op, DAG);
+ case ISD::VP_SELECT:
+ return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
case ISD::VP_ADD:
return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
case ISD::VP_SUB:
@@ -3148,11 +3196,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_UREM:
return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
case ISD::VP_AND:
- return lowerVPOp(Op, DAG, RISCVISD::AND_VL);
+ return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
case ISD::VP_OR:
- return lowerVPOp(Op, DAG, RISCVISD::OR_VL);
+ return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
case ISD::VP_XOR:
- return lowerVPOp(Op, DAG, RISCVISD::XOR_VL);
+ return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
case ISD::VP_ASHR:
return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
case ISD::VP_LSHR:
@@ -4469,19 +4517,19 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
}
MVT M1VT = getLMUL1VT(ContainerVT);
+ MVT XLenVT = Subtarget.getXLenVT();
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- // FIXME: This is a VLMAX splat which might be too large and can prevent
- // vsetvli removal.
SDValue NeutralElem =
DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
- SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
+ SDValue IdentitySplat = lowerScalarSplat(
+ NeutralElem, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
IdentitySplat, Mask, VL);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
- DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+ DAG.getConstant(0, DL, XLenVT));
return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
}
@@ -4497,9 +4545,12 @@ getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
switch (Opcode) {
default:
llvm_unreachable("Unhandled reduction");
- case ISD::VECREDUCE_FADD:
- return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
- DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
+ case ISD::VECREDUCE_FADD: {
+ // Use positive zero if we can. It is cheaper to materialize.
+ SDValue Zero =
+ DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
+ return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
+ }
case ISD::VECREDUCE_SEQ_FADD:
return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
Op.getOperand(0));
@@ -4530,17 +4581,17 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
}
MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
+ MVT XLenVT = Subtarget.getXLenVT();
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- // FIXME: This is a VLMAX splat which might be too large and can prevent
- // vsetvli removal.
- SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
+ SDValue ScalarSplat = lowerScalarSplat(
+ ScalarVal, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
VectorVal, ScalarSplat, Mask, VL);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
- DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+ DAG.getConstant(0, DL, XLenVT));
}
static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
@@ -4602,13 +4653,13 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
MVT XLenVT = Subtarget.getXLenVT();
MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
- // FIXME: This is a VLMAX splat which might be too large and can prevent
- // vsetvli removal.
- SDValue StartSplat = DAG.getSplatVector(M1VT, DL, Op.getOperand(0));
+ SDValue StartSplat =
+ lowerScalarSplat(Op.getOperand(0), DAG.getConstant(1, DL, XLenVT), M1VT,
+ DL, DAG, Subtarget);
SDValue Reduction =
DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
- DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+ DAG.getConstant(0, DL, XLenVT));
if (!VecVT.isInteger())
return Elt0;
return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
@@ -5365,6 +5416,33 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
}
+SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
+ unsigned MaskOpc,
+ unsigned VecOpc) const {
+ MVT VT = Op.getSimpleValueType();
+ if (VT.getVectorElementType() != MVT::i1)
+ return lowerVPOp(Op, DAG, VecOpc);
+
+ // It is safe to drop mask parameter as masked-off elements are undef.
+ SDValue Op1 = Op->getOperand(0);
+ SDValue Op2 = Op->getOperand(1);
+ SDValue VL = Op->getOperand(3);
+
+ MVT ContainerVT = VT;
+ const bool IsFixed = VT.isFixedLengthVector();
+ if (IsFixed) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
+ Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
+ }
+
+ SDLoc DL(Op);
+ SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
+ if (!IsFixed)
+ return Val;
+ return convertFromScalableVector(VT, Val, DAG, Subtarget);
+}
+
// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
// matched to a RVV indexed load. The RVV indexed load instructions only
// support the "unsigned unscaled" addressing mode; indices are implicitly
@@ -5695,11 +5773,17 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
TargetLowering::TypeSoftenFloat) {
- // FIXME: Support strict FP.
- if (IsStrict)
- return;
if (!isTypeLegal(Op0.getValueType()))
return;
+ if (IsStrict) {
+ unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RTZ_RV64
+ : RISCVISD::STRICT_FCVT_WU_RTZ_RV64;
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
+ SDValue Res = DAG.getNode(Opc, DL, VTs, N->getOperand(0), Op0);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ Results.push_back(Res.getValue(1));
+ return;
+ }
unsigned Opc =
IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64;
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0);
@@ -7026,7 +7110,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth)))
return SDValue(N, 0);
- return combineGREVI_GORCI(N, DCI.DAG);
+ return combineGREVI_GORCI(N, DAG);
}
case RISCVISD::GREVW:
case RISCVISD::GORCW: {
@@ -7035,7 +7119,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
- return combineGREVI_GORCI(N, DCI.DAG);
+ return combineGREVI_GORCI(N, DAG);
}
case RISCVISD::SHFL:
case RISCVISD::UNSHFL: {
@@ -7120,11 +7204,23 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
// type legalization. This is safe because fp_to_uint produces poison if
// it overflows.
- if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit() &&
- N->getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
- isTypeLegal(N->getOperand(0).getOperand(0).getValueType()))
- return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
- N->getOperand(0).getOperand(0));
+ if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
+ SDValue Src = N->getOperand(0);
+ if (Src.getOpcode() == ISD::FP_TO_UINT &&
+ isTypeLegal(Src.getOperand(0).getValueType()))
+ return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
+ Src.getOperand(0));
+ if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
+ isTypeLegal(Src.getOperand(1).getValueType())) {
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
+ SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
+ Src.getOperand(0), Src.getOperand(1));
+ DCI.CombineTo(N, Res);
+ DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(Src.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked.
+ }
+ }
return SDValue();
case RISCVISD::SELECT_CC: {
// Transform
@@ -7685,6 +7781,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::BDECOMPRESSW:
case RISCVISD::FCVT_W_RTZ_RV64:
case RISCVISD::FCVT_WU_RTZ_RV64:
+ case RISCVISD::STRICT_FCVT_W_RTZ_RV64:
+ case RISCVISD::STRICT_FCVT_WU_RTZ_RV64:
// TODO: As the result is sign-extended, this is conservatively correct. A
// more precise answer could be calculated for SRAW depending on known
// bits in the shift amount.
@@ -8004,6 +8102,22 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
}
+void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const {
+ // Add FRM dependency to any instructions with dynamic rounding mode.
+ unsigned Opc = MI.getOpcode();
+ auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
+ if (Idx < 0)
+ return;
+ if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
+ return;
+ // If the instruction already reads FRM, don't add another read.
+ if (MI.readsRegister(RISCV::FRM))
+ return;
+ MI.addOperand(
+ MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
+}
+
// Calling Convention Implementation.
// The expectations for frontend ABI lowering vary from target to target.
// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
@@ -9400,6 +9514,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FCVT_XU_RTZ)
NODE_NAME_CASE(FCVT_W_RTZ_RV64)
NODE_NAME_CASE(FCVT_WU_RTZ_RV64)
+ NODE_NAME_CASE(STRICT_FCVT_W_RTZ_RV64)
+ NODE_NAME_CASE(STRICT_FCVT_WU_RTZ_RV64)
NODE_NAME_CASE(READ_CYCLE_WIDE)
NODE_NAME_CASE(GREV)
NODE_NAME_CASE(GREVW)
@@ -9541,6 +9657,9 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
+ // TODO: Support fixed vectors up to XLen for P extension?
+ if (VT.isVector())
+ break;
return std::make_pair(0U, &RISCV::GPRRegClass);
case 'f':
if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
@@ -9553,17 +9672,15 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
default:
break;
}
- } else {
- if (Constraint == "vr") {
- for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
- &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
- if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
- return std::make_pair(0U, RC);
- }
- } else if (Constraint == "vm") {
- if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
- return std::make_pair(0U, &RISCV::VMRegClass);
+ } else if (Constraint == "vr") {
+ for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
+ &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
+ if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
+ return std::make_pair(0U, RC);
}
+ } else if (Constraint == "vm") {
+ if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
+ return std::make_pair(0U, &RISCV::VMV0RegClass);
}
// Clang will correctly decode the usage of register name aliases into their
@@ -10101,17 +10218,29 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts(
unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
if (PartVTBitSize % ValueVTBitSize == 0) {
+ assert(PartVTBitSize >= ValueVTBitSize);
// If the element types are different, bitcast to the same element type of
// PartVT first.
+ // Give an example here, we want copy a <vscale x 1 x i8> value to
+ // <vscale x 4 x i16>.
+ // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
+ // subvector, then we can bitcast to <vscale x 4 x i16>.
if (ValueEltVT != PartEltVT) {
- unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
- assert(Count != 0 && "The number of element should not be zero.");
- EVT SameEltTypeVT =
- EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
- Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
+ if (PartVTBitSize > ValueVTBitSize) {
+ unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
+ assert(Count != 0 && "The number of element should not be zero.");
+ EVT SameEltTypeVT =
+ EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
+ Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
+ DAG.getUNDEF(SameEltTypeVT), Val,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else {
+ Val =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
+ Val, DAG.getVectorIdxConstant(0, DL));
}
- Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
- Val, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
Parts[0] = Val;
return true;
}
@@ -10141,19 +10270,23 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
if (PartVTBitSize % ValueVTBitSize == 0) {
+ assert(PartVTBitSize >= ValueVTBitSize);
EVT SameEltTypeVT = ValueVT;
// If the element types are different, convert it to the same element type
// of PartVT.
+ // Give an example here, we want copy a <vscale x 1 x i8> value from
+ // <vscale x 4 x i16>.
+ // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
+ // then we can extract <vscale x 1 x i8>.
if (ValueEltVT != PartEltVT) {
- unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
+ unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
assert(Count != 0 && "The number of element should not be zero.");
SameEltTypeVT =
- EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
+ EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
+ Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
}
- Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val,
- DAG.getConstant(0, DL, Subtarget.getXLenVT()));
- if (ValueEltVT != PartEltVT)
- Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
return Val;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 849928eb46ae..48c5ce730933 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -282,6 +282,11 @@ enum NodeType : unsigned {
// the value read before the modification and the new chain pointer.
SWAP_CSR,
+ // FP to 32 bit int conversions for RV64. These are used to keep track of the
+ // result being sign extended to 64 bit. These saturate out of range inputs.
+ STRICT_FCVT_W_RTZ_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+ STRICT_FCVT_WU_RTZ_RV64,
+
// Memory opcodes start here.
VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE,
VSE_VL,
@@ -315,7 +320,7 @@ public:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
- bool hasAndNot(SDValue Y) const override;
+ bool hasAndNotCompare(SDValue Y) const override;
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
@@ -383,6 +388,9 @@ public:
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
+ void AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const override;
+
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
@@ -593,6 +601,8 @@ private:
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc,
bool HasMask = true) const;
SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc) const;
+ SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc,
+ unsigned VecOpc) const;
SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
unsigned ExtendOpc) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index cfad4cdb9364..6a16b6354f95 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -107,31 +107,44 @@ def Vcompress : RISCVVConstraint<!or(VS2Constraint.Value,
// The following opcode names match those given in Table 19.1 in the
// RISC-V User-level ISA specification ("RISC-V base opcode map").
-class RISCVOpcode<bits<7> val> {
+class RISCVOpcode<string name, bits<7> val> {
+ string Name = name;
bits<7> Value = val;
}
-def OPC_LOAD : RISCVOpcode<0b0000011>;
-def OPC_LOAD_FP : RISCVOpcode<0b0000111>;
-def OPC_MISC_MEM : RISCVOpcode<0b0001111>;
-def OPC_OP_IMM : RISCVOpcode<0b0010011>;
-def OPC_AUIPC : RISCVOpcode<0b0010111>;
-def OPC_OP_IMM_32 : RISCVOpcode<0b0011011>;
-def OPC_STORE : RISCVOpcode<0b0100011>;
-def OPC_STORE_FP : RISCVOpcode<0b0100111>;
-def OPC_AMO : RISCVOpcode<0b0101111>;
-def OPC_OP : RISCVOpcode<0b0110011>;
-def OPC_LUI : RISCVOpcode<0b0110111>;
-def OPC_OP_32 : RISCVOpcode<0b0111011>;
-def OPC_MADD : RISCVOpcode<0b1000011>;
-def OPC_MSUB : RISCVOpcode<0b1000111>;
-def OPC_NMSUB : RISCVOpcode<0b1001011>;
-def OPC_NMADD : RISCVOpcode<0b1001111>;
-def OPC_OP_FP : RISCVOpcode<0b1010011>;
-def OPC_OP_V : RISCVOpcode<0b1010111>;
-def OPC_BRANCH : RISCVOpcode<0b1100011>;
-def OPC_JALR : RISCVOpcode<0b1100111>;
-def OPC_JAL : RISCVOpcode<0b1101111>;
-def OPC_SYSTEM : RISCVOpcode<0b1110011>;
+def RISCVOpcodesList : GenericTable {
+ let FilterClass = "RISCVOpcode";
+ let Fields = [
+ "Name", "Value"
+ ];
+ let PrimaryKey = [ "Value" ];
+ let PrimaryKeyName = "lookupRISCVOpcodeByValue";
+}
+def lookupRISCVOpcodeByName : SearchIndex {
+ let Table = RISCVOpcodesList;
+ let Key = [ "Name" ];
+}
+def OPC_LOAD : RISCVOpcode<"LOAD", 0b0000011>;
+def OPC_LOAD_FP : RISCVOpcode<"LOAD_FP", 0b0000111>;
+def OPC_MISC_MEM : RISCVOpcode<"MISC_MEM", 0b0001111>;
+def OPC_OP_IMM : RISCVOpcode<"OP_IMM", 0b0010011>;
+def OPC_AUIPC : RISCVOpcode<"AUIPC", 0b0010111>;
+def OPC_OP_IMM_32 : RISCVOpcode<"OP_IMM_32", 0b0011011>;
+def OPC_STORE : RISCVOpcode<"STORE", 0b0100011>;
+def OPC_STORE_FP : RISCVOpcode<"STORE_FP", 0b0100111>;
+def OPC_AMO : RISCVOpcode<"AMO", 0b0101111>;
+def OPC_OP : RISCVOpcode<"OP", 0b0110011>;
+def OPC_LUI : RISCVOpcode<"LUI", 0b0110111>;
+def OPC_OP_32 : RISCVOpcode<"OP_32", 0b0111011>;
+def OPC_MADD : RISCVOpcode<"MADD", 0b1000011>;
+def OPC_MSUB : RISCVOpcode<"MSUB", 0b1000111>;
+def OPC_NMSUB : RISCVOpcode<"NMSUB", 0b1001011>;
+def OPC_NMADD : RISCVOpcode<"NMADD", 0b1001111>;
+def OPC_OP_FP : RISCVOpcode<"OP_FP", 0b1010011>;
+def OPC_OP_V : RISCVOpcode<"OP_V", 0b1010111>;
+def OPC_BRANCH : RISCVOpcode<"BRANCH", 0b1100011>;
+def OPC_JALR : RISCVOpcode<"JALR", 0b1100111>;
+def OPC_JAL : RISCVOpcode<"JAL", 0b1101111>;
+def OPC_SYSTEM : RISCVOpcode<"SYSTEM", 0b1110011>;
class RVInst<dag outs, dag ins, string opcodestr, string argstr,
list<dag> pattern, InstFormat format>
@@ -188,8 +201,7 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
// Pseudo instructions
class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string argstr = "">
- : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo>,
- Sched<[]> {
+ : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -265,14 +277,14 @@ class RVInstR4Frm<bits<2> funct2, RISCVOpcode opcode, dag outs, dag ins,
bits<5> rs3;
bits<5> rs2;
bits<5> rs1;
- bits<3> funct3;
+ bits<3> frm;
bits<5> rd;
let Inst{31-27} = rs3;
let Inst{26-25} = funct2;
let Inst{24-20} = rs2;
let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
+ let Inst{14-12} = frm;
let Inst{11-7} = rd;
let Opcode = opcode.Value;
}
@@ -300,13 +312,13 @@ class RVInstRFrm<bits<7> funct7, RISCVOpcode opcode, dag outs, dag ins,
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
bits<5> rs2;
bits<5> rs1;
- bits<3> funct3;
+ bits<3> frm;
bits<5> rd;
let Inst{31-25} = funct7;
let Inst{24-20} = rs2;
let Inst{19-15} = rs1;
- let Inst{14-12} = funct3;
+ let Inst{14-12} = frm;
let Inst{11-7} = rd;
let Opcode = opcode.Value;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
index 80f46b73bfd7..69e9d3553b30 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
@@ -45,19 +45,6 @@ def SUMOPUnitStride : RISCVLSUMOP<0b00000>;
def SUMOPUnitStrideMask : RISCVLSUMOP<0b01011>;
def SUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
-class RISCVAMOOP<bits<5> val> {
- bits<5> Value = val;
-}
-def AMOOPVamoSwap : RISCVAMOOP<0b00001>;
-def AMOOPVamoAdd : RISCVAMOOP<0b00000>;
-def AMOOPVamoXor : RISCVAMOOP<0b00100>;
-def AMOOPVamoAnd : RISCVAMOOP<0b01100>;
-def AMOOPVamoOr : RISCVAMOOP<0b01000>;
-def AMOOPVamoMin : RISCVAMOOP<0b10000>;
-def AMOOPVamoMax : RISCVAMOOP<0b10100>;
-def AMOOPVamoMinu : RISCVAMOOP<0b11000>;
-def AMOOPVamoMaxu : RISCVAMOOP<0b11100>;
-
class RISCVWidth<bits<4> val> {
bits<4> Value = val;
}
@@ -342,22 +329,3 @@ class RVInstVSX<bits<3> nf, bit mew, RISCVMOP mop, bits<3> width,
let Uses = [VTYPE, VL];
}
-
-class RVInstVAMO<RISCVAMOOP amoop, bits<3> width, dag outs,
- dag ins, string opcodestr, string argstr>
- : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
- bits<5> vs2;
- bits<5> rs1;
- bit wd;
- bit vm;
-
- let Inst{31-27} = amoop.Value;
- let Inst{26} = wd;
- let Inst{25} = vm;
- let Inst{24-20} = vs2;
- let Inst{19-15} = rs1;
- let Inst{14-12} = width;
- let Opcode = OPC_AMO.Value;
-
- let Uses = [VTYPE, VL];
-}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 547d82550cac..2e2e00886d57 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -35,6 +35,7 @@ using namespace llvm;
#include "RISCVGenCompressInstEmitter.inc"
#define GET_INSTRINFO_CTOR_DTOR
+#define GET_INSTRINFO_NAMED_OPS
#include "RISCVGenInstrInfo.inc"
static cl::opt<bool> PreferWholeRegisterMove(
@@ -1059,6 +1060,7 @@ bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
break;
case RISCV::FSGNJ_D:
case RISCV::FSGNJ_S:
+ case RISCV::FSGNJ_H:
// The canonical floating-point move is fsgnj rd, rs, rs.
return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
@@ -1087,6 +1089,7 @@ RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
break;
case RISCV::FSGNJ_D:
case RISCV::FSGNJ_S:
+ case RISCV::FSGNJ_H:
// The canonical floating-point move is fsgnj rd, rs, rs.
if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
@@ -1254,7 +1257,7 @@ bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
unsigned &Flags) const {
// More accurate safety checking is done in getOutliningCandidateInfo.
- return true;
+ return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
}
// Enum values indicating how an outlined call should be constructed.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 2bfad7844c43..da0877c4299a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -18,6 +18,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#define GET_INSTRINFO_HEADER
+#define GET_INSTRINFO_OPERAND_ENUM
#include "RISCVGenInstrInfo.inc"
namespace llvm {
@@ -181,6 +182,10 @@ protected:
};
namespace RISCV {
+
+// Implemented in RISCVGenInstrInfo.inc
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
+
// Special immediate for AVL operand of V pseudo instructions to indicate VLMax.
static constexpr int64_t VLMaxSentinel = -1LL;
} // namespace RISCV
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 6f9cde966132..71eb6f01a4f4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -174,6 +174,20 @@ def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
let OperandNamespace = "RISCVOp";
}
+def InsnDirectiveOpcode : AsmOperandClass {
+ let Name = "InsnDirectiveOpcode";
+ let ParserMethod = "parseInsnDirectiveOpcode";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImm";
+}
+
+def uimm7_opcode : Operand<XLenVT> {
+ let ParserMatchClass = InsnDirectiveOpcode;
+ let DecoderMethod = "decodeUImmOperand<7>";
+ let OperandType = "OPERAND_UIMM7";
+ let OperandNamespace = "RISCVOp";
+}
+
def uimm7 : Operand<XLenVT> {
let ParserMatchClass = UImmAsmOperand<7>;
let DecoderMethod = "decodeUImmOperand<7>";
@@ -878,35 +892,35 @@ def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>;
// isCodeGenOnly = 1 to hide them from the tablegened assembly parser.
let isCodeGenOnly = 1, hasSideEffects = 1, mayLoad = 1, mayStore = 1,
hasNoSchedulingInfo = 1 in {
-def InsnR : DirectiveInsnR<(outs AnyReg:$rd), (ins uimm7:$opcode, uimm3:$funct3,
+def InsnR : DirectiveInsnR<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode, uimm3:$funct3,
uimm7:$funct7, AnyReg:$rs1,
AnyReg:$rs2),
"$opcode, $funct3, $funct7, $rd, $rs1, $rs2">;
-def InsnR4 : DirectiveInsnR4<(outs AnyReg:$rd), (ins uimm7:$opcode,
+def InsnR4 : DirectiveInsnR4<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode,
uimm3:$funct3,
uimm2:$funct2,
AnyReg:$rs1, AnyReg:$rs2,
AnyReg:$rs3),
"$opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3">;
-def InsnI : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7:$opcode, uimm3:$funct3,
+def InsnI : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode, uimm3:$funct3,
AnyReg:$rs1, simm12:$imm12),
"$opcode, $funct3, $rd, $rs1, $imm12">;
-def InsnI_Mem : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7:$opcode,
+def InsnI_Mem : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode,
uimm3:$funct3,
AnyReg:$rs1,
simm12:$imm12),
"$opcode, $funct3, $rd, ${imm12}(${rs1})">;
-def InsnB : DirectiveInsnB<(outs), (ins uimm7:$opcode, uimm3:$funct3,
+def InsnB : DirectiveInsnB<(outs), (ins uimm7_opcode:$opcode, uimm3:$funct3,
AnyReg:$rs1, AnyReg:$rs2,
simm13_lsb0:$imm12),
"$opcode, $funct3, $rs1, $rs2, $imm12">;
-def InsnU : DirectiveInsnU<(outs AnyReg:$rd), (ins uimm7:$opcode,
+def InsnU : DirectiveInsnU<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode,
uimm20_lui:$imm20),
"$opcode, $rd, $imm20">;
-def InsnJ : DirectiveInsnJ<(outs AnyReg:$rd), (ins uimm7:$opcode,
+def InsnJ : DirectiveInsnJ<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode,
simm21_lsb0_jal:$imm20),
"$opcode, $rd, $imm20">;
-def InsnS : DirectiveInsnS<(outs), (ins uimm7:$opcode, uimm3:$funct3,
+def InsnS : DirectiveInsnS<(outs), (ins uimm7_opcode:$opcode, uimm3:$funct3,
AnyReg:$rs2, AnyReg:$rs1,
simm12:$imm12),
"$opcode, $funct3, $rs2, ${imm12}(${rs1})">;
@@ -918,37 +932,37 @@ def InsnS : DirectiveInsnS<(outs), (ins uimm7:$opcode, uimm3:$funct3,
// for known formats.
let EmitPriority = 0 in {
def : InstAlias<".insn_r $opcode, $funct3, $funct7, $rd, $rs1, $rs2",
- (InsnR AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm7:$funct7,
+ (InsnR AnyReg:$rd, uimm7_opcode:$opcode, uimm3:$funct3, uimm7:$funct7,
AnyReg:$rs1, AnyReg:$rs2)>;
// Accept 4 register form of ".insn r" as alias for ".insn r4".
def : InstAlias<".insn_r $opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3",
- (InsnR4 AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm2:$funct2,
+ (InsnR4 AnyReg:$rd, uimm7_opcode:$opcode, uimm3:$funct3, uimm2:$funct2,
AnyReg:$rs1, AnyReg:$rs2, AnyReg:$rs3)>;
def : InstAlias<".insn_r4 $opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3",
- (InsnR4 AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm2:$funct2,
+ (InsnR4 AnyReg:$rd, uimm7_opcode:$opcode, uimm3:$funct3, uimm2:$funct2,
AnyReg:$rs1, AnyReg:$rs2, AnyReg:$rs3)>;
def : InstAlias<".insn_i $opcode, $funct3, $rd, $rs1, $imm12",
- (InsnI AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1,
+ (InsnI AnyReg:$rd, uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs1,
simm12:$imm12)>;
def : InstAlias<".insn_i $opcode, $funct3, $rd, ${imm12}(${rs1})",
- (InsnI_Mem AnyReg:$rd, uimm7:$opcode, uimm3:$funct3,
+ (InsnI_Mem AnyReg:$rd, uimm7_opcode:$opcode, uimm3:$funct3,
AnyReg:$rs1, simm12:$imm12)>;
def : InstAlias<".insn_b $opcode, $funct3, $rs1, $rs2, $imm12",
- (InsnB uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1,
+ (InsnB uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs1,
AnyReg:$rs2, simm13_lsb0:$imm12)>;
// Accept sb as an alias for b.
def : InstAlias<".insn_sb $opcode, $funct3, $rs1, $rs2, $imm12",
- (InsnB uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1,
+ (InsnB uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs1,
AnyReg:$rs2, simm13_lsb0:$imm12)>;
def : InstAlias<".insn_u $opcode, $rd, $imm20",
- (InsnU AnyReg:$rd, uimm7:$opcode, uimm20_lui:$imm20)>;
+ (InsnU AnyReg:$rd, uimm7_opcode:$opcode, uimm20_lui:$imm20)>;
def : InstAlias<".insn_j $opcode, $rd, $imm20",
- (InsnJ AnyReg:$rd, uimm7:$opcode, simm21_lsb0_jal:$imm20)>;
+ (InsnJ AnyReg:$rd, uimm7_opcode:$opcode, simm21_lsb0_jal:$imm20)>;
// Accept uj as an alias for j.
def : InstAlias<".insn_uj $opcode, $rd, $imm20",
- (InsnJ AnyReg:$rd, uimm7:$opcode, simm21_lsb0_jal:$imm20)>;
+ (InsnJ AnyReg:$rd, uimm7_opcode:$opcode, simm21_lsb0_jal:$imm20)>;
def : InstAlias<".insn_s $opcode, $funct3, $rs2, ${imm12}(${rs1})",
- (InsnS uimm7:$opcode, uimm3:$funct3, AnyReg:$rs2,
+ (InsnS uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs2,
AnyReg:$rs1, simm12:$imm12)>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 2cd011a02345..d6c31c4804db 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -26,41 +26,6 @@ def RISCVBuildPairF64 : SDNode<"RISCVISD::BuildPairF64", SDT_RISCVBuildPairF64>;
def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>;
//===----------------------------------------------------------------------===//
-// Instruction Class Templates
-//===----------------------------------------------------------------------===//
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPFMAD_rrr_frm<RISCVOpcode opcode, string opcodestr>
- : RVInstR4Frm<0b01, opcode, (outs FPR64:$rd),
- (ins FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, frmarg:$funct3),
- opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">;
-
-class FPFMADDynFrmAlias<FPFMAD_rrr_frm Inst, string OpcodeStr>
- : InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3",
- (Inst FPR64:$rd, FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPALUD_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
- : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR64:$rd),
- (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">;
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPALUD_rr_frm<bits<7> funct7, string opcodestr>
- : RVInstRFrm<funct7, OPC_OP_FP, (outs FPR64:$rd),
- (ins FPR64:$rs1, FPR64:$rs2, frmarg:$funct3), opcodestr,
- "$rd, $rs1, $rs2, $funct3">;
-
-class FPALUDDynFrmAlias<FPALUD_rr_frm Inst, string OpcodeStr>
- : InstAlias<OpcodeStr#" $rd, $rs1, $rs2",
- (Inst FPR64:$rd, FPR64:$rs1, FPR64:$rs2, 0b111)>;
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPCmpD_rr<bits<3> funct3, string opcodestr>
- : RVInstR<0b1010001, funct3, OPC_OP_FP, (outs GPR:$rd),
- (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">,
- Sched<[WriteFCmp64, ReadFCmp64, ReadFCmp64]>;
-
-//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -81,126 +46,104 @@ def FSD : RVInstS<0b011, OPC_STORE_FP, (outs),
"fsd", "$rs2, ${imm12}(${rs1})">,
Sched<[WriteFST64, ReadStoreData, ReadFMemBase]>;
-def FMADD_D : FPFMAD_rrr_frm<OPC_MADD, "fmadd.d">,
- Sched<[WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64]>;
-def : FPFMADDynFrmAlias<FMADD_D, "fmadd.d">;
-def FMSUB_D : FPFMAD_rrr_frm<OPC_MSUB, "fmsub.d">,
- Sched<[WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64]>;
-def : FPFMADDynFrmAlias<FMSUB_D, "fmsub.d">;
-def FNMSUB_D : FPFMAD_rrr_frm<OPC_NMSUB, "fnmsub.d">,
- Sched<[WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64]>;
-def : FPFMADDynFrmAlias<FNMSUB_D, "fnmsub.d">;
-def FNMADD_D : FPFMAD_rrr_frm<OPC_NMADD, "fnmadd.d">,
- Sched<[WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64]>;
-def : FPFMADDynFrmAlias<FNMADD_D, "fnmadd.d">;
+let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in {
+def FMADD_D : FPFMA_rrr_frm<OPC_MADD, 0b01, "fmadd.d", FPR64>;
+def FMSUB_D : FPFMA_rrr_frm<OPC_MSUB, 0b01, "fmsub.d", FPR64>;
+def FNMSUB_D : FPFMA_rrr_frm<OPC_NMSUB, 0b01, "fnmsub.d", FPR64>;
+def FNMADD_D : FPFMA_rrr_frm<OPC_NMADD, 0b01, "fnmadd.d", FPR64>;
+}
+
+def : FPFMADynFrmAlias<FMADD_D, "fmadd.d", FPR64>;
+def : FPFMADynFrmAlias<FMSUB_D, "fmsub.d", FPR64>;
+def : FPFMADynFrmAlias<FNMSUB_D, "fnmsub.d", FPR64>;
+def : FPFMADynFrmAlias<FNMADD_D, "fnmadd.d", FPR64>;
-def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">,
+def FADD_D : FPALU_rr_frm<0b0000001, "fadd.d", FPR64>,
Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
-def : FPALUDDynFrmAlias<FADD_D, "fadd.d">;
-def FSUB_D : FPALUD_rr_frm<0b0000101, "fsub.d">,
+def FSUB_D : FPALU_rr_frm<0b0000101, "fsub.d", FPR64>,
Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
-def : FPALUDDynFrmAlias<FSUB_D, "fsub.d">;
-def FMUL_D : FPALUD_rr_frm<0b0001001, "fmul.d">,
+def FMUL_D : FPALU_rr_frm<0b0001001, "fmul.d", FPR64>,
Sched<[WriteFMul64, ReadFMul64, ReadFMul64]>;
-def : FPALUDDynFrmAlias<FMUL_D, "fmul.d">;
-def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">,
+def FDIV_D : FPALU_rr_frm<0b0001101, "fdiv.d", FPR64>,
Sched<[WriteFDiv64, ReadFDiv64, ReadFDiv64]>;
-def : FPALUDDynFrmAlias<FDIV_D, "fdiv.d">;
-def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d">,
- Sched<[WriteFSqrt64, ReadFSqrt64]> {
- let rs2 = 0b00000;
-}
-def : FPUnaryOpDynFrmAlias<FSQRT_D, "fsqrt.d", FPR64, FPR64>;
+def : FPALUDynFrmAlias<FADD_D, "fadd.d", FPR64>;
+def : FPALUDynFrmAlias<FSUB_D, "fsub.d", FPR64>;
+def : FPALUDynFrmAlias<FMUL_D, "fmul.d", FPR64>;
+def : FPALUDynFrmAlias<FDIV_D, "fdiv.d", FPR64>;
-def FSGNJ_D : FPALUD_rr<0b0010001, 0b000, "fsgnj.d">,
- Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>;
-def FSGNJN_D : FPALUD_rr<0b0010001, 0b001, "fsgnjn.d">,
- Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>;
-def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">,
- Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>;
-def FMIN_D : FPALUD_rr<0b0010101, 0b000, "fmin.d">,
- Sched<[WriteFMinMax64, ReadFMinMax64, ReadFMinMax64]>;
-def FMAX_D : FPALUD_rr<0b0010101, 0b001, "fmax.d">,
- Sched<[WriteFMinMax64, ReadFMinMax64, ReadFMinMax64]>;
+def FSQRT_D : FPUnaryOp_r_frm<0b0101101, 0b00000, FPR64, FPR64, "fsqrt.d">,
+ Sched<[WriteFSqrt64, ReadFSqrt64]>;
+def : FPUnaryOpDynFrmAlias<FSQRT_D, "fsqrt.d", FPR64, FPR64>;
-def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d">,
- Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]> {
- let rs2 = 0b00001;
+let SchedRW = [WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64],
+ mayRaiseFPException = 0 in {
+def FSGNJ_D : FPALU_rr<0b0010001, 0b000, "fsgnj.d", FPR64>;
+def FSGNJN_D : FPALU_rr<0b0010001, 0b001, "fsgnjn.d", FPR64>;
+def FSGNJX_D : FPALU_rr<0b0010001, 0b010, "fsgnjx.d", FPR64>;
}
-def : FPUnaryOpDynFrmAlias<FCVT_S_D, "fcvt.s.d", FPR32, FPR64>;
-def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s">,
- Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]> {
- let rs2 = 0b00000;
+let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in {
+def FMIN_D : FPALU_rr<0b0010101, 0b000, "fmin.d", FPR64>;
+def FMAX_D : FPALU_rr<0b0010101, 0b001, "fmax.d", FPR64>;
}
-def FEQ_D : FPCmpD_rr<0b010, "feq.d">;
-def FLT_D : FPCmpD_rr<0b001, "flt.d">;
-def FLE_D : FPCmpD_rr<0b000, "fle.d">;
+def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, 0b00001, FPR32, FPR64, "fcvt.s.d">,
+ Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>;
+def : FPUnaryOpDynFrmAlias<FCVT_S_D, "fcvt.s.d", FPR32, FPR64>;
+
+def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b00000, 0b000, FPR64, FPR32, "fcvt.d.s">,
+ Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>;
-def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d">,
- Sched<[WriteFClass64, ReadFClass64]> {
- let rs2 = 0b00000;
+let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in {
+def FEQ_D : FPCmp_rr<0b1010001, 0b010, "feq.d", FPR64>;
+def FLT_D : FPCmp_rr<0b1010001, 0b001, "flt.d", FPR64>;
+def FLE_D : FPCmp_rr<0b1010001, 0b000, "fle.d", FPR64>;
}
-def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d">,
- Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> {
- let rs2 = 0b00000;
-}
+let mayRaiseFPException = 0 in
+def FCLASS_D : FPUnaryOp_r<0b1110001, 0b00000, 0b001, GPR, FPR64, "fclass.d">,
+ Sched<[WriteFClass64, ReadFClass64]>;
+
+def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, 0b00000, GPR, FPR64, "fcvt.w.d">,
+ Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
def : FPUnaryOpDynFrmAlias<FCVT_W_D, "fcvt.w.d", GPR, FPR64>;
-def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d">,
- Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> {
- let rs2 = 0b00001;
-}
+def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, 0b00001, GPR, FPR64, "fcvt.wu.d">,
+ Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
def : FPUnaryOpDynFrmAlias<FCVT_WU_D, "fcvt.wu.d", GPR, FPR64>;
-def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w">,
- Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> {
- let rs2 = 0b00000;
-}
+def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b00000, 0b000, FPR64, GPR, "fcvt.d.w">,
+ Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
-def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu">,
- Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> {
- let rs2 = 0b00001;
-}
+def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b00001, 0b000, FPR64, GPR, "fcvt.d.wu">,
+ Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
} // Predicates = [HasStdExtD]
let Predicates = [HasStdExtD, IsRV64] in {
-def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d">,
- Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> {
- let rs2 = 0b00010;
-}
+def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, 0b00010, GPR, FPR64, "fcvt.l.d">,
+ Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>;
def : FPUnaryOpDynFrmAlias<FCVT_L_D, "fcvt.l.d", GPR, FPR64>;
-def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d">,
- Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> {
- let rs2 = 0b00011;
-}
+def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, 0b00011, GPR, FPR64, "fcvt.lu.d">,
+ Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>;
def : FPUnaryOpDynFrmAlias<FCVT_LU_D, "fcvt.lu.d", GPR, FPR64>;
-def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d">,
- Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]> {
- let rs2 = 0b00000;
-}
+let mayRaiseFPException = 0 in
+def FMV_X_D : FPUnaryOp_r<0b1110001, 0b00000, 0b000, GPR, FPR64, "fmv.x.d">,
+ Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]>;
-def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l">,
- Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> {
- let rs2 = 0b00010;
-}
+def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, 0b00010, FPR64, GPR, "fcvt.d.l">,
+ Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>;
def : FPUnaryOpDynFrmAlias<FCVT_D_L, "fcvt.d.l", FPR64, GPR>;
-def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu">,
- Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> {
- let rs2 = 0b00011;
-}
+def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, 0b00011, FPR64, GPR, "fcvt.d.lu">,
+ Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>;
def : FPUnaryOpDynFrmAlias<FCVT_D_LU, "fcvt.d.lu", FPR64, GPR>;
-def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x">,
- Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]> {
- let rs2 = 0b00000;
-}
+let mayRaiseFPException = 0 in
+def FMV_D_X : FPUnaryOp_r<0b1111001, 0b00000, 0b000, FPR64, GPR, "fmv.d.x">,
+ Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]>;
} // Predicates = [HasStdExtD, IsRV64]
//===----------------------------------------------------------------------===//
@@ -241,20 +184,20 @@ let Predicates = [HasStdExtD] in {
/// Float conversion operations
// f64 -> f32, f32 -> f64
-def : Pat<(fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>;
-def : Pat<(fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>;
+def : Pat<(any_fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>;
+def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>;
// [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so
// are defined later.
/// Float arithmetic operations
-def : PatFpr64Fpr64DynFrm<fadd, FADD_D>;
-def : PatFpr64Fpr64DynFrm<fsub, FSUB_D>;
-def : PatFpr64Fpr64DynFrm<fmul, FMUL_D>;
-def : PatFpr64Fpr64DynFrm<fdiv, FDIV_D>;
+def : PatFpr64Fpr64DynFrm<any_fadd, FADD_D>;
+def : PatFpr64Fpr64DynFrm<any_fsub, FSUB_D>;
+def : PatFpr64Fpr64DynFrm<any_fmul, FMUL_D>;
+def : PatFpr64Fpr64DynFrm<any_fdiv, FDIV_D>;
-def : Pat<(fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, 0b111)>;
+def : Pat<(any_fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, 0b111)>;
def : Pat<(fneg FPR64:$rs1), (FSGNJN_D $rs1, $rs1)>;
def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>;
@@ -266,19 +209,19 @@ def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2,
0b111))>;
// fmadd: rs1 * rs2 + rs3
-def : Pat<(fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3),
+def : Pat<(any_fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3),
(FMADD_D $rs1, $rs2, $rs3, 0b111)>;
// fmsub: rs1 * rs2 - rs3
-def : Pat<(fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)),
+def : Pat<(any_fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)),
(FMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
// fnmsub: -rs1 * rs2 + rs3
-def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3),
+def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3),
(FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
// fnmadd: -rs1 * rs2 - rs3
-def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)),
+def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)),
(FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
// The ratified 20191213 ISA spec defines fmin and fmax in a way that matches
@@ -328,8 +271,8 @@ let Predicates = [HasStdExtD, IsRV32] in {
def : Pat<(f64 (fpimm0)), (FCVT_D_W (i32 X0))>;
// double->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>;
-def : Pat<(i32 (fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>;
// Saturating double->[u]int32.
def : Pat<(i32 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_W_D $rs1, 0b001)>;
@@ -342,8 +285,8 @@ def : Pat<(i32 (lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>;
def : Pat<(i32 (lround FPR64:$rs1)), (FCVT_W_D $rs1, 0b100)>;
// [u]int->double.
-def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>;
-def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>;
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>;
} // Predicates = [HasStdExtD, IsRV32]
let Predicates = [HasStdExtD, IsRV64] in {
@@ -358,20 +301,20 @@ def : Pat<(i64 (bitconvert FPR64:$rs1)), (FMV_X_D FPR64:$rs1)>;
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>;
-def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>;
+def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>;
+def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>;
// [u]int32->fp
-def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>;
-def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>;
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>;
// Saturating double->[u]int64.
def : Pat<(i64 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_L_D $rs1, 0b001)>;
def : Pat<(i64 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_LU_D $rs1, 0b001)>;
// double->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>;
-def : Pat<(i64 (fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>;
// double->int64 with current rounding mode.
def : Pat<(i64 (lrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>;
@@ -382,6 +325,6 @@ def : Pat<(i64 (lround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>;
def : Pat<(i64 (llround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>;
// [u]int64->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>;
-def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>;
+def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>;
+def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>;
} // Predicates = [HasStdExtD, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 3400c3be52bf..bb45ed859442 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -19,9 +19,9 @@ def SDT_RISCVFMV_W_X_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>;
def SDT_RISCVFMV_X_ANYEXTW_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
-def STD_RISCVFCVT_W_RV64
+def SDT_RISCVFCVT_W_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>;
-def STD_RISCVFCVT_X
+def SDT_RISCVFCVT_X
: SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>;
def riscv_fmv_w_x_rv64
@@ -29,13 +29,27 @@ def riscv_fmv_w_x_rv64
def riscv_fmv_x_anyextw_rv64
: SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>;
def riscv_fcvt_w_rtz_rv64
- : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", STD_RISCVFCVT_W_RV64>;
+ : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64>;
def riscv_fcvt_wu_rtz_rv64
- : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", STD_RISCVFCVT_W_RV64>;
+ : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64>;
def riscv_fcvt_x_rtz
- : SDNode<"RISCVISD::FCVT_X_RTZ", STD_RISCVFCVT_X>;
+ : SDNode<"RISCVISD::FCVT_X_RTZ", SDT_RISCVFCVT_X>;
def riscv_fcvt_xu_rtz
- : SDNode<"RISCVISD::FCVT_XU_RTZ", STD_RISCVFCVT_X>;
+ : SDNode<"RISCVISD::FCVT_XU_RTZ", SDT_RISCVFCVT_X>;
+
+def riscv_strict_fcvt_w_rtz_rv64
+ : SDNode<"RISCVISD::STRICT_FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64,
+ [SDNPHasChain]>;
+def riscv_strict_fcvt_wu_rtz_rv64
+ : SDNode<"RISCVISD::STRICT_FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64,
+ [SDNPHasChain]>;
+
+def riscv_any_fcvt_w_rtz_rv64 : PatFrags<(ops node:$src),
+ [(riscv_strict_fcvt_w_rtz_rv64 node:$src),
+ (riscv_fcvt_w_rtz_rv64 node:$src)]>;
+def riscv_any_fcvt_wu_rtz_rv64 : PatFrags<(ops node:$src),
+ [(riscv_strict_fcvt_wu_rtz_rv64 node:$src),
+ (riscv_fcvt_wu_rtz_rv64 node:$src)]>;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
@@ -59,54 +73,65 @@ def frmarg : Operand<XLenVT> {
// Instruction class templates
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPFMAS_rrr_frm<RISCVOpcode opcode, string opcodestr>
- : RVInstR4Frm<0b00, opcode, (outs FPR32:$rd),
- (ins FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, frmarg:$funct3),
- opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
+ UseNamedOperandTable = 1, hasPostISelHook = 1 in
+class FPFMA_rrr_frm<RISCVOpcode opcode, bits<2> funct2, string opcodestr,
+ RegisterClass rty>
+ : RVInstR4Frm<funct2, opcode, (outs rty:$rd),
+ (ins rty:$rs1, rty:$rs2, rty:$rs3, frmarg:$frm),
+ opcodestr, "$rd, $rs1, $rs2, $rs3, $frm">;
-class FPFMASDynFrmAlias<FPFMAS_rrr_frm Inst, string OpcodeStr>
+class FPFMADynFrmAlias<FPFMA_rrr_frm Inst, string OpcodeStr,
+ RegisterClass rty>
: InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3",
- (Inst FPR32:$rd, FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+ (Inst rty:$rd, rty:$rs1, rty:$rs2, rty:$rs3, 0b111)>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPALUS_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
- : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR32:$rd),
- (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
+class FPALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
+ RegisterClass rty>
+ : RVInstR<funct7, funct3, OPC_OP_FP, (outs rty:$rd),
+ (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPALUS_rr_frm<bits<7> funct7, string opcodestr>
- : RVInstRFrm<funct7, OPC_OP_FP, (outs FPR32:$rd),
- (ins FPR32:$rs1, FPR32:$rs2, frmarg:$funct3), opcodestr,
- "$rd, $rs1, $rs2, $funct3">;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
+ UseNamedOperandTable = 1, hasPostISelHook = 1 in
+class FPALU_rr_frm<bits<7> funct7, string opcodestr, RegisterClass rty>
+ : RVInstRFrm<funct7, OPC_OP_FP, (outs rty:$rd),
+ (ins rty:$rs1, rty:$rs2, frmarg:$frm), opcodestr,
+ "$rd, $rs1, $rs2, $frm">;
-class FPALUSDynFrmAlias<FPALUS_rr_frm Inst, string OpcodeStr>
+class FPALUDynFrmAlias<FPALU_rr_frm Inst, string OpcodeStr,
+ RegisterClass rty>
: InstAlias<OpcodeStr#" $rd, $rs1, $rs2",
- (Inst FPR32:$rd, FPR32:$rs1, FPR32:$rs2, 0b111)>;
+ (Inst rty:$rd, rty:$rs1, rty:$rs2, 0b111)>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPUnaryOp_r<bits<7> funct7, bits<3> funct3, RegisterClass rdty,
- RegisterClass rs1ty, string opcodestr>
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
+class FPUnaryOp_r<bits<7> funct7, bits<5> rs2val, bits<3> funct3,
+ RegisterClass rdty, RegisterClass rs1ty, string opcodestr>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs rdty:$rd), (ins rs1ty:$rs1),
- opcodestr, "$rd, $rs1">;
+ opcodestr, "$rd, $rs1"> {
+ let rs2 = rs2val;
+}
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPUnaryOp_r_frm<bits<7> funct7, RegisterClass rdty, RegisterClass rs1ty,
- string opcodestr>
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
+ UseNamedOperandTable = 1, hasPostISelHook = 1 in
+class FPUnaryOp_r_frm<bits<7> funct7, bits<5> rs2val, RegisterClass rdty,
+ RegisterClass rs1ty, string opcodestr>
: RVInstRFrm<funct7, OPC_OP_FP, (outs rdty:$rd),
- (ins rs1ty:$rs1, frmarg:$funct3), opcodestr,
- "$rd, $rs1, $funct3">;
+ (ins rs1ty:$rs1, frmarg:$frm), opcodestr,
+ "$rd, $rs1, $frm"> {
+ let rs2 = rs2val;
+}
class FPUnaryOpDynFrmAlias<FPUnaryOp_r_frm Inst, string OpcodeStr,
RegisterClass rdty, RegisterClass rs1ty>
: InstAlias<OpcodeStr#" $rd, $rs1",
(Inst rdty:$rd, rs1ty:$rs1, 0b111)>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPCmpS_rr<bits<3> funct3, string opcodestr>
- : RVInstR<0b1010000, funct3, OPC_OP_FP, (outs GPR:$rd),
- (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">,
- Sched<[WriteFCmp32, ReadFCmp32, ReadFCmp32]>;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
+class FPCmp_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
+ RegisterClass rty>
+ : RVInstR<funct7, funct3, OPC_OP_FP, (outs GPR:$rd),
+ (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">;
//===----------------------------------------------------------------------===//
// Instructions
@@ -128,116 +153,98 @@ def FSW : RVInstS<0b010, OPC_STORE_FP, (outs),
"fsw", "$rs2, ${imm12}(${rs1})">,
Sched<[WriteFST32, ReadStoreData, ReadFMemBase]>;
-def FMADD_S : FPFMAS_rrr_frm<OPC_MADD, "fmadd.s">,
- Sched<[WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32]>;
-def : FPFMASDynFrmAlias<FMADD_S, "fmadd.s">;
-def FMSUB_S : FPFMAS_rrr_frm<OPC_MSUB, "fmsub.s">,
- Sched<[WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32]>;
-def : FPFMASDynFrmAlias<FMSUB_S, "fmsub.s">;
-def FNMSUB_S : FPFMAS_rrr_frm<OPC_NMSUB, "fnmsub.s">,
- Sched<[WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32]>;
-def : FPFMASDynFrmAlias<FNMSUB_S, "fnmsub.s">;
-def FNMADD_S : FPFMAS_rrr_frm<OPC_NMADD, "fnmadd.s">,
- Sched<[WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32]>;
-def : FPFMASDynFrmAlias<FNMADD_S, "fnmadd.s">;
+let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in {
+def FMADD_S : FPFMA_rrr_frm<OPC_MADD, 0b00, "fmadd.s", FPR32>;
+def FMSUB_S : FPFMA_rrr_frm<OPC_MSUB, 0b00, "fmsub.s", FPR32>;
+def FNMSUB_S : FPFMA_rrr_frm<OPC_NMSUB, 0b00, "fnmsub.s", FPR32>;
+def FNMADD_S : FPFMA_rrr_frm<OPC_NMADD, 0b00, "fnmadd.s", FPR32>;
+}
+
+def : FPFMADynFrmAlias<FMADD_S, "fmadd.s", FPR32>;
+def : FPFMADynFrmAlias<FMSUB_S, "fmsub.s", FPR32>;
+def : FPFMADynFrmAlias<FNMSUB_S, "fnmsub.s", FPR32>;
+def : FPFMADynFrmAlias<FNMADD_S, "fnmadd.s", FPR32>;
-def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">,
+def FADD_S : FPALU_rr_frm<0b0000000, "fadd.s", FPR32>,
Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
-def : FPALUSDynFrmAlias<FADD_S, "fadd.s">;
-def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">,
+def FSUB_S : FPALU_rr_frm<0b0000100, "fsub.s", FPR32>,
Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
-def : FPALUSDynFrmAlias<FSUB_S, "fsub.s">;
-def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">,
+def FMUL_S : FPALU_rr_frm<0b0001000, "fmul.s", FPR32>,
Sched<[WriteFMul32, ReadFMul32, ReadFMul32]>;
-def : FPALUSDynFrmAlias<FMUL_S, "fmul.s">;
-def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">,
+def FDIV_S : FPALU_rr_frm<0b0001100, "fdiv.s", FPR32>,
Sched<[WriteFDiv32, ReadFDiv32, ReadFDiv32]>;
-def : FPALUSDynFrmAlias<FDIV_S, "fdiv.s">;
-def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s">,
- Sched<[WriteFSqrt32, ReadFSqrt32]> {
- let rs2 = 0b00000;
-}
+def : FPALUDynFrmAlias<FADD_S, "fadd.s", FPR32>;
+def : FPALUDynFrmAlias<FSUB_S, "fsub.s", FPR32>;
+def : FPALUDynFrmAlias<FMUL_S, "fmul.s", FPR32>;
+def : FPALUDynFrmAlias<FDIV_S, "fdiv.s", FPR32>;
+
+def FSQRT_S : FPUnaryOp_r_frm<0b0101100, 0b00000, FPR32, FPR32, "fsqrt.s">,
+ Sched<[WriteFSqrt32, ReadFSqrt32]>;
def : FPUnaryOpDynFrmAlias<FSQRT_S, "fsqrt.s", FPR32, FPR32>;
-def FSGNJ_S : FPALUS_rr<0b0010000, 0b000, "fsgnj.s">,
- Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>;
-def FSGNJN_S : FPALUS_rr<0b0010000, 0b001, "fsgnjn.s">,
- Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>;
-def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">,
- Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>;
-def FMIN_S : FPALUS_rr<0b0010100, 0b000, "fmin.s">,
- Sched<[WriteFMinMax32, ReadFMinMax32, ReadFMinMax32]>;
-def FMAX_S : FPALUS_rr<0b0010100, 0b001, "fmax.s">,
- Sched<[WriteFMinMax32, ReadFMinMax32, ReadFMinMax32]>;
+let SchedRW = [WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32],
+ mayRaiseFPException = 0 in {
+def FSGNJ_S : FPALU_rr<0b0010000, 0b000, "fsgnj.s", FPR32>;
+def FSGNJN_S : FPALU_rr<0b0010000, 0b001, "fsgnjn.s", FPR32>;
+def FSGNJX_S : FPALU_rr<0b0010000, 0b010, "fsgnjx.s", FPR32>;
+}
-def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s">,
- Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> {
- let rs2 = 0b00000;
+let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in {
+def FMIN_S : FPALU_rr<0b0010100, 0b000, "fmin.s", FPR32>;
+def FMAX_S : FPALU_rr<0b0010100, 0b001, "fmax.s", FPR32>;
}
+
+def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, 0b00000, GPR, FPR32, "fcvt.w.s">,
+ Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>;
def : FPUnaryOpDynFrmAlias<FCVT_W_S, "fcvt.w.s", GPR, FPR32>;
-def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s">,
- Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> {
- let rs2 = 0b00001;
-}
+def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, 0b00001, GPR, FPR32, "fcvt.wu.s">,
+ Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>;
def : FPUnaryOpDynFrmAlias<FCVT_WU_S, "fcvt.wu.s", GPR, FPR32>;
-def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w">,
- Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]> {
- let rs2 = 0b00000;
-}
-
-def FEQ_S : FPCmpS_rr<0b010, "feq.s">;
-def FLT_S : FPCmpS_rr<0b001, "flt.s">;
-def FLE_S : FPCmpS_rr<0b000, "fle.s">;
+let mayRaiseFPException = 0 in
+def FMV_X_W : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR32, "fmv.x.w">,
+ Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>;
-def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s">,
- Sched<[WriteFClass32, ReadFClass32]> {
- let rs2 = 0b00000;
+let SchedRW = [WriteFCmp32, ReadFCmp32, ReadFCmp32] in {
+def FEQ_S : FPCmp_rr<0b1010000, 0b010, "feq.s", FPR32>;
+def FLT_S : FPCmp_rr<0b1010000, 0b001, "flt.s", FPR32>;
+def FLE_S : FPCmp_rr<0b1010000, 0b000, "fle.s", FPR32>;
}
-def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w">,
- Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> {
- let rs2 = 0b00000;
-}
+let mayRaiseFPException = 0 in
+def FCLASS_S : FPUnaryOp_r<0b1110000, 0b00000, 0b001, GPR, FPR32, "fclass.s">,
+ Sched<[WriteFClass32, ReadFClass32]>;
+
+def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, 0b00000, FPR32, GPR, "fcvt.s.w">,
+ Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>;
def : FPUnaryOpDynFrmAlias<FCVT_S_W, "fcvt.s.w", FPR32, GPR>;
-def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu">,
- Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> {
- let rs2 = 0b00001;
-}
+def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, 0b00001, FPR32, GPR, "fcvt.s.wu">,
+ Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>;
def : FPUnaryOpDynFrmAlias<FCVT_S_WU, "fcvt.s.wu", FPR32, GPR>;
-def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x">,
- Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]> {
- let rs2 = 0b00000;
-}
+let mayRaiseFPException = 0 in
+def FMV_W_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR32, GPR, "fmv.w.x">,
+ Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>;
} // Predicates = [HasStdExtF]
let Predicates = [HasStdExtF, IsRV64] in {
-def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s">,
- Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> {
- let rs2 = 0b00010;
-}
+def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, 0b00010, GPR, FPR32, "fcvt.l.s">,
+ Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>;
def : FPUnaryOpDynFrmAlias<FCVT_L_S, "fcvt.l.s", GPR, FPR32>;
-def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s">,
- Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> {
- let rs2 = 0b00011;
-}
+def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, 0b00011, GPR, FPR32, "fcvt.lu.s">,
+ Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>;
def : FPUnaryOpDynFrmAlias<FCVT_LU_S, "fcvt.lu.s", GPR, FPR32>;
-def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l">,
- Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> {
- let rs2 = 0b00010;
-}
+def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, 0b00010, FPR32, GPR, "fcvt.s.l">,
+ Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>;
def : FPUnaryOpDynFrmAlias<FCVT_S_L, "fcvt.s.l", FPR32, GPR>;
-def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu">,
- Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> {
- let rs2 = 0b00011;
-}
+def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, 0b00011, FPR32, GPR, "fcvt.s.lu">,
+ Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>;
def : FPUnaryOpDynFrmAlias<FCVT_S_LU, "fcvt.s.lu", FPR32, GPR>;
} // Predicates = [HasStdExtF, IsRV64]
@@ -320,12 +327,12 @@ def : Pat<(f32 (fpimm0)), (FMV_W_X X0)>;
/// Float arithmetic operations
-def : PatFpr32Fpr32DynFrm<fadd, FADD_S>;
-def : PatFpr32Fpr32DynFrm<fsub, FSUB_S>;
-def : PatFpr32Fpr32DynFrm<fmul, FMUL_S>;
-def : PatFpr32Fpr32DynFrm<fdiv, FDIV_S>;
+def : PatFpr32Fpr32DynFrm<any_fadd, FADD_S>;
+def : PatFpr32Fpr32DynFrm<any_fsub, FSUB_S>;
+def : PatFpr32Fpr32DynFrm<any_fmul, FMUL_S>;
+def : PatFpr32Fpr32DynFrm<any_fdiv, FDIV_S>;
-def : Pat<(fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, 0b111)>;
+def : Pat<(any_fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, 0b111)>;
def : Pat<(fneg FPR32:$rs1), (FSGNJN_S $rs1, $rs1)>;
def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>;
@@ -334,19 +341,19 @@ def : PatFpr32Fpr32<fcopysign, FSGNJ_S>;
def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>;
// fmadd: rs1 * rs2 + rs3
-def : Pat<(fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3),
+def : Pat<(any_fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3),
(FMADD_S $rs1, $rs2, $rs3, 0b111)>;
// fmsub: rs1 * rs2 - rs3
-def : Pat<(fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)),
+def : Pat<(any_fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)),
(FMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
// fnmsub: -rs1 * rs2 + rs3
-def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3),
+def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3),
(FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
// fnmadd: -rs1 * rs2 - rs3
-def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)),
+def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)),
(FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
// The ratified 20191213 ISA spec defines fmin and fmax in a way that matches
@@ -382,8 +389,8 @@ def : Pat<(bitconvert (i32 GPR:$rs1)), (FMV_W_X GPR:$rs1)>;
def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
// float->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(i32 (fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>;
// Saturating float->[u]int32.
def : Pat<(i32 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>;
@@ -396,8 +403,8 @@ def : Pat<(i32 (lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>;
def : Pat<(i32 (lround FPR32:$rs1)), (FCVT_W_S $rs1, 0b100)>;
// [u]int->float. Match GCC and default to using dynamic rounding mode.
-def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>;
-def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>;
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>;
} // Predicates = [HasStdExtF, IsRV32]
let Predicates = [HasStdExtF, IsRV64] in {
@@ -410,12 +417,12 @@ def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32),
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
+def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
// float->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;
-def : Pat<(i64 (fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>;
// Saturating float->[u]int64.
def : Pat<(i64 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;
@@ -430,8 +437,8 @@ def : Pat<(i64 (lround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>;
def : Pat<(i64 (llround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>;
-def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>;
-def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>;
-def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>;
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>;
+def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>;
+def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>;
} // Predicates = [HasStdExtF, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index a037dbf585ce..b62e23d3b0fa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -96,14 +96,6 @@ def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))),
(REMW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtM, IsRV64]
-// Pattern to detect constants with no more than 32 active bits that can't
-// be materialized with lui+addiw.
-def uimm32_not_simm32 : PatLeaf<(XLenVT GPR:$a), [{
- auto *C = dyn_cast<ConstantSDNode>(N);
- return C && C->hasOneUse() && isUInt<32>(C->getZExtValue()) &&
- !isInt<32>(C->getSExtValue());
-}]>;
-
let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in {
// Special case for calculating the full 64-bit product of a 32x32 unsigned
// multiply where the inputs aren't known to be zero extended. We can shift the
@@ -111,9 +103,4 @@ let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in {
// zeroing the upper 32 bits.
def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
(MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
-// The RHS could also be a constant that is hard to materialize. By shifting
-// left we can allow constant materialization to use LUI+ADDIW via
-// hasAllWUsers.
-def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), uimm32_not_simm32:$rs2)),
- (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
} // Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 3d5f9bc54731..173ae43a08d6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -338,29 +338,6 @@ class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr>
opcodestr, "$vd, $vs2$vm">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
-let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in {
-// vamo vd, (rs1), vs2, vd, vm
-class VAMOWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr>
- : RVInstVAMO<amoop, width.Value{2-0}, (outs VR:$vd_wd),
- (ins GPR:$rs1, VR:$vs2, VR:$vd, VMaskOp:$vm),
- opcodestr, "$vd_wd, (${rs1}), $vs2, $vd$vm"> {
- let Constraints = "$vd_wd = $vd";
- let wd = 1;
- bits<5> vd;
- let Inst{11-7} = vd;
-}
-
-// vamo x0, (rs1), vs2, vs3, vm
-class VAMONoWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr>
- : RVInstVAMO<amoop, width.Value{2-0}, (outs),
- (ins GPR:$rs1, VR:$vs2, VR:$vs3, VMaskOp:$vm),
- opcodestr, "x0, (${rs1}), $vs2, $vs3$vm"> {
- bits<5> vs3;
- let Inst{11-7} = vs3;
-}
-
-} // hasSideEffects = 0, mayLoad = 1, mayStore = 1
-
//===----------------------------------------------------------------------===//
// Combination of instruction classes.
// Use these multiclasses to define instructions more easily.
@@ -779,11 +756,6 @@ multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>;
}
-multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> {
- def _WD : VAMOWd<amoop, width, opcodestr>;
- def _UNWD : VAMONoWd<amoop, width, opcodestr>;
-}
-
multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> {
foreach l = [8, 16, 32, 64] in {
defvar w = !cast<RISCVWidth>("LSWidth" # l);
@@ -822,7 +794,7 @@ foreach eew = [8, 16, 32, 64] in {
// Vector Strided Instructions
def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSched<eew>;
def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSched<eew>;
-
+
// Vector Indexed Instructions
def VLUXEI#eew#_V :
VIndexedLoad<MOPLDIndexedUnord, w, "vluxei"#eew#".v">, VLXSched<eew, "U">;
@@ -1416,13 +1388,20 @@ defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
RVVConstraint = NoConstraint in {
-foreach n = [1, 2, 4, 8] in {
- def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs VR:$vd),
- (ins VR:$vs2), "vmv" # n # "r.v", "$vd, $vs2">,
- VMVRSched<n> {
+def VMV1R_V : RVInstV<0b100111, 0, OPIVI, (outs VR:$vd), (ins VR:$vs2),
+ "vmv1r.v", "$vd, $vs2">, VMVRSched<1> {
let Uses = [];
let vm = 1;
}
+// A future extension may relax the vector register alignment restrictions.
+foreach n = [2, 4, 8] in {
+ defvar vrc = !cast<VReg>("VRM"#n);
+ def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs vrc:$vd),
+ (ins vrc:$vs2), "vmv" # n # "r.v", "$vd, $vs2">,
+ VMVRSched<n> {
+ let Uses = [];
+ let vm = 1;
+ }
}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
} // Predicates = [HasStdExtV]
@@ -1462,31 +1441,4 @@ let Predicates = [HasStdExtZvlsseg] in {
}
} // Predicates = [HasStdExtZvlsseg]
-let Predicates = [HasStdExtZvamo, HasStdExtA] in {
- foreach eew = [8, 16, 32] in {
- defvar w = !cast<RISCVWidth>("LSWidth"#eew);
- defm VAMOSWAPEI#eew : VAMO<AMOOPVamoSwap, w, "vamoswapei"#eew#".v">;
- defm VAMOADDEI#eew : VAMO<AMOOPVamoAdd, w, "vamoaddei"#eew#".v">;
- defm VAMOXOREI#eew : VAMO<AMOOPVamoXor, w, "vamoxorei"#eew#".v">;
- defm VAMOANDEI#eew : VAMO<AMOOPVamoAnd, w, "vamoandei"#eew#".v">;
- defm VAMOOREI#eew : VAMO<AMOOPVamoOr, w, "vamoorei"#eew#".v">;
- defm VAMOMINEI#eew : VAMO<AMOOPVamoMin, w, "vamominei"#eew#".v">;
- defm VAMOMAXEI#eew : VAMO<AMOOPVamoMax, w, "vamomaxei"#eew#".v">;
- defm VAMOMINUEI#eew : VAMO<AMOOPVamoMinu, w, "vamominuei"#eew#".v">;
- defm VAMOMAXUEI#eew : VAMO<AMOOPVamoMaxu, w, "vamomaxuei"#eew#".v">;
- }
-} // Predicates = [HasStdExtZvamo, HasStdExtA]
-
-let Predicates = [HasStdExtZvamo, HasStdExtA, IsRV64] in {
- defm VAMOSWAPEI64 : VAMO<AMOOPVamoSwap, LSWidth64, "vamoswapei64.v">;
- defm VAMOADDEI64 : VAMO<AMOOPVamoAdd, LSWidth64, "vamoaddei64.v">;
- defm VAMOXOREI64 : VAMO<AMOOPVamoXor, LSWidth64, "vamoxorei64.v">;
- defm VAMOANDEI64 : VAMO<AMOOPVamoAnd, LSWidth64, "vamoandei64.v">;
- defm VAMOOREI64 : VAMO<AMOOPVamoOr, LSWidth64, "vamoorei64.v">;
- defm VAMOMINEI64 : VAMO<AMOOPVamoMin, LSWidth64, "vamominei64.v">;
- defm VAMOMAXEI64 : VAMO<AMOOPVamoMax, LSWidth64, "vamomaxei64.v">;
- defm VAMOMINUEI64 : VAMO<AMOOPVamoMinu, LSWidth64, "vamominuei64.v">;
- defm VAMOMAXUEI64 : VAMO<AMOOPVamoMaxu, LSWidth64, "vamomaxuei64.v">;
-} // Predicates = [HasStdExtZvamo, HasStdExtA, IsRV64]
-
include "RISCVInstrInfoVPseudos.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index a82e333e6bab..073fa605e0fb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -1124,68 +1124,6 @@ class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
-class VPseudoAMOWDNoMask<VReg RetClass,
- VReg Op1Class> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd),
- (ins GPR:$rs1,
- Op1Class:$vs2,
- GetVRegNoV0<RetClass>.R:$vd,
- AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
- let mayLoad = 1;
- let mayStore = 1;
- let hasSideEffects = 1;
- let Constraints = "$vd_wd = $vd";
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
-}
-
-class VPseudoAMOWDMask<VReg RetClass,
- VReg Op1Class> :
- Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd),
- (ins GPR:$rs1,
- Op1Class:$vs2,
- GetVRegNoV0<RetClass>.R:$vd,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
- let mayLoad = 1;
- let mayStore = 1;
- let hasSideEffects = 1;
- let Constraints = "$vd_wd = $vd";
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
-}
-
-multiclass VPseudoAMOEI<int eew> {
- // Standard scalar AMO supports 32, 64, and 128 Mem data bits,
- // and in the base vector "V" extension, only SEW up to ELEN = max(XLEN, FLEN)
- // are required to be supported.
- // therefore only [32, 64] is allowed here.
- foreach sew = [32, 64] in {
- foreach lmul = MxSet<sew>.m in {
- defvar octuple_lmul = lmul.octuple;
- // Calculate emul = eew * lmul / sew
- defvar octuple_emul = !srl(!mul(eew, octuple_lmul), log2<sew>.val);
- if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
- defvar emulMX = octuple_to_str<octuple_emul>.ret;
- defvar emul= !cast<LMULInfo>("V_" # emulMX);
- let VLMul = lmul.value in {
- def "_WD_" # lmul.MX # "_" # emulMX : VPseudoAMOWDNoMask<lmul.vrclass, emul.vrclass>;
- def "_WD_" # lmul.MX # "_" # emulMX # "_MASK" : VPseudoAMOWDMask<lmul.vrclass, emul.vrclass>;
- }
- }
- }
- }
-}
-
-multiclass VPseudoAMO {
- foreach eew = EEWList in
- defm "EI" # eew : VPseudoAMOEI<eew>;
-}
-
class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>:
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
@@ -1376,17 +1314,35 @@ class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL,
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
-multiclass VPseudoUSLoad<bit isFF> {
+multiclass VPseudoUSLoad {
foreach eew = EEWList in {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
defvar vreg = lmul.vrclass;
- defvar FFStr = !if(isFF, "FF", "");
let VLMul = lmul.value in {
- def "E" # eew # FFStr # "_V_" # LInfo :
- VPseudoUSLoadNoMask<vreg, eew, isFF>;
- def "E" # eew # FFStr # "_V_" # LInfo # "_MASK" :
- VPseudoUSLoadMask<vreg, eew, isFF>;
+ def "E" # eew # "_V_" # LInfo :
+ VPseudoUSLoadNoMask<vreg, eew, false>,
+ VLESched<eew>;
+ def "E" # eew # "_V_" # LInfo # "_MASK" :
+ VPseudoUSLoadMask<vreg, eew, false>,
+ VLESched<eew>;
+ }
+ }
+ }
+}
+
+multiclass VPseudoFFLoad {
+ foreach eew = EEWList in {
+ foreach lmul = MxSet<eew>.m in {
+ defvar LInfo = lmul.MX;
+ defvar vreg = lmul.vrclass;
+ let VLMul = lmul.value in {
+ def "E" # eew # "FF_V_" # LInfo :
+ VPseudoUSLoadNoMask<vreg, eew, true>,
+ VLFSched<eew>;
+ def "E" # eew # "FF_V_" # LInfo # "_MASK" :
+ VPseudoUSLoadMask<vreg, eew, true>,
+ VLFSched<eew>;
}
}
}
@@ -1406,8 +1362,10 @@ multiclass VPseudoSLoad {
defvar LInfo = lmul.MX;
defvar vreg = lmul.vrclass;
let VLMul = lmul.value in {
- def "E" # eew # "_V_" # LInfo : VPseudoSLoadNoMask<vreg, eew>;
- def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSLoadMask<vreg, eew>;
+ def "E" # eew # "_V_" # LInfo : VPseudoSLoadNoMask<vreg, eew>,
+ VLSSched<eew>;
+ def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSLoadMask<vreg, eew>,
+ VLSSched<eew>;
}
}
}
@@ -1427,11 +1385,14 @@ multiclass VPseudoILoad<bit Ordered> {
defvar Vreg = lmul.vrclass;
defvar IdxVreg = idx_lmul.vrclass;
defvar HasConstraint = !ne(sew, eew);
+ defvar Order = !if(Ordered, "O", "U");
let VLMul = lmul.value in {
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo :
- VPseudoILoadNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>;
+ VPseudoILoadNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
+ VLXSched<eew, Order>;
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" :
- VPseudoILoadMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>;
+ VPseudoILoadMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
+ VLXSched<eew, Order>;
}
}
}
@@ -1445,8 +1406,10 @@ multiclass VPseudoUSStore {
defvar LInfo = lmul.MX;
defvar vreg = lmul.vrclass;
let VLMul = lmul.value in {
- def "E" # eew # "_V_" # LInfo : VPseudoUSStoreNoMask<vreg, eew>;
- def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSStoreMask<vreg, eew>;
+ def "E" # eew # "_V_" # LInfo : VPseudoUSStoreNoMask<vreg, eew>,
+ VSESched<eew>;
+ def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSStoreMask<vreg, eew>,
+ VSESched<eew>;
}
}
}
@@ -1466,8 +1429,10 @@ multiclass VPseudoSStore {
defvar LInfo = lmul.MX;
defvar vreg = lmul.vrclass;
let VLMul = lmul.value in {
- def "E" # eew # "_V_" # LInfo : VPseudoSStoreNoMask<vreg, eew>;
- def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSStoreMask<vreg, eew>;
+ def "E" # eew # "_V_" # LInfo : VPseudoSStoreNoMask<vreg, eew>,
+ VSSSched<eew>;
+ def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSStoreMask<vreg, eew>,
+ VSSSched<eew>;
}
}
}
@@ -1486,11 +1451,14 @@ multiclass VPseudoIStore<bit Ordered> {
defvar idx_lmul = !cast<LMULInfo>("V_" # IdxLInfo);
defvar Vreg = lmul.vrclass;
defvar IdxVreg = idx_lmul.vrclass;
+ defvar Order = !if(Ordered, "O", "U");
let VLMul = lmul.value in {
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo :
- VPseudoIStoreNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>;
+ VPseudoIStoreNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>,
+ VSXSched<eew, Order>;
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" :
- VPseudoIStoreMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>;
+ VPseudoIStoreMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>,
+ VSXSched<eew, Order>;
}
}
}
@@ -1498,32 +1466,50 @@ multiclass VPseudoIStore<bit Ordered> {
}
}
-multiclass VPseudoUnaryS_M {
+multiclass VPseudoVPOP_M {
foreach mti = AllMasks in
{
let VLMul = mti.LMul.value in {
- def "_M_" # mti.BX : VPseudoUnaryNoMask<GPR, VR>;
- def "_M_" # mti.BX # "_MASK" : VPseudoMaskUnarySOutMask;
+ def "_M_" # mti.BX : VPseudoUnaryNoMask<GPR, VR>,
+ Sched<[WriteVMPopV, ReadVMPopV, ReadVMPopV]>;
+ def "_M_" # mti.BX # "_MASK" : VPseudoMaskUnarySOutMask,
+ Sched<[WriteVMPopV, ReadVMPopV, ReadVMPopV]>;
}
}
}
-multiclass VPseudoUnaryM_M {
+multiclass VPseudoV1ST_M {
+ foreach mti = AllMasks in
+ {
+ let VLMul = mti.LMul.value in {
+ def "_M_" # mti.BX : VPseudoUnaryNoMask<GPR, VR>,
+ Sched<[WriteVMFFSV, ReadVMFFSV, ReadVMFFSV]>;
+ def "_M_" # mti.BX # "_MASK" : VPseudoMaskUnarySOutMask,
+ Sched<[WriteVMFFSV, ReadVMFFSV, ReadVMFFSV]>;
+ }
+ }
+}
+
+multiclass VPseudoVSFS_M {
defvar constraint = "@earlyclobber $rd";
foreach mti = AllMasks in
{
let VLMul = mti.LMul.value in {
- def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>;
- def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>;
+ def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>,
+ Sched<[WriteVMSFSV, ReadVMSFSV, ReadVMask]>;
+ def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>,
+ Sched<[WriteVMSFSV, ReadVMSFSV, ReadVMask]>;
}
}
}
-multiclass VPseudoMaskNullaryV {
+multiclass VPseudoVID_V {
foreach m = MxList.m in {
let VLMul = m.value in {
- def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>;
- def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>;
+ def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>,
+ Sched<[WriteVMIdxV, ReadVMask]>;
+ def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>,
+ Sched<[WriteVMIdxV, ReadVMask]>;
}
}
}
@@ -1536,20 +1522,23 @@ multiclass VPseudoNullaryPseudoM <string BaseInst> {
}
}
-multiclass VPseudoUnaryV_M {
+multiclass VPseudoVIOT_M {
defvar constraint = "@earlyclobber $rd";
foreach m = MxList.m in {
let VLMul = m.value in {
- def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>;
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>,
+ Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>,
+ Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>;
}
}
}
-multiclass VPseudoUnaryV_V_AnyMask {
+multiclass VPseudoVCPR_V {
foreach m = MxList.m in {
let VLMul = m.value in
- def _VM # "_" # m.MX : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>;
+ def _VM # "_" # m.MX : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>,
+ Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>;
}
}
@@ -1611,7 +1600,7 @@ multiclass VPseudoBinaryV_VV<string Constraint = ""> {
defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>;
}
-multiclass VPseudoBinaryV_VV_EEW<int eew, string Constraint = ""> {
+multiclass VPseudoVGTR_VV_EEW<int eew, string Constraint = ""> {
foreach m = MxList.m in {
foreach sew = EEWList in {
defvar octuple_lmul = m.octuple;
@@ -1620,7 +1609,8 @@ multiclass VPseudoBinaryV_VV_EEW<int eew, string Constraint = ""> {
if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
defvar emulMX = octuple_to_str<octuple_emul>.ret;
defvar emul = !cast<LMULInfo>("V_" # emulMX);
- defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint>;
+ defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint>,
+ Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV]>;
}
}
}
@@ -1631,6 +1621,12 @@ multiclass VPseudoBinaryV_VX<string Constraint = ""> {
defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>;
}
+multiclass VPseudoVSLD1_VX<string Constraint = ""> {
+ foreach m = MxList.m in
+ defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>,
+ Sched<[WriteVISlide1X, ReadVISlideV, ReadVISlideX, ReadVMask]>;
+}
+
multiclass VPseudoBinaryV_VF<string Constraint = ""> {
foreach m = MxList.m in
foreach f = FPList.fpinfo in
@@ -1638,15 +1634,24 @@ multiclass VPseudoBinaryV_VF<string Constraint = ""> {
f.fprclass, m, Constraint>;
}
+multiclass VPseudoVSLD1_VF<string Constraint = ""> {
+ foreach m = MxList.m in
+ foreach f = FPList.fpinfo in
+ defm "_V" # f.FX :
+ VPseudoBinary<m.vrclass, m.vrclass, f.fprclass, m, Constraint>,
+ Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>;
+}
+
multiclass VPseudoBinaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList.m in
defm _VI : VPseudoBinary<m.vrclass, m.vrclass, ImmType, m, Constraint>;
}
-multiclass VPseudoBinaryM_MM {
+multiclass VPseudoVALU_MM {
foreach m = MxList.m in
let VLMul = m.value in {
- def "_MM_" # m.MX : VPseudoBinaryNoMask<VR, VR, VR, "">;
+ def "_MM_" # m.MX : VPseudoBinaryNoMask<VR, VR, VR, "">,
+ Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>;
}
}
@@ -1744,12 +1749,13 @@ multiclass VPseudoBinaryV_XM<bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, GPR, m, CarryIn, Constraint>;
}
-multiclass VPseudoBinaryV_FM {
+multiclass VPseudoVMRG_FM {
foreach m = MxList.m in
foreach f = FPList.fpinfo in
def "_V" # f.FX # "M_" # m.MX :
VPseudoBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">;
+ m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">,
+ Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>;
}
multiclass VPseudoBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1,
@@ -1762,76 +1768,102 @@ multiclass VPseudoBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, simm5, m, CarryIn, Constraint>;
}
-multiclass VPseudoUnaryV_V_X_I_NoDummyMask {
+multiclass VPseudoUnaryVMV_V_X_I {
foreach m = MxList.m in {
let VLMul = m.value in {
- def "_V_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, m.vrclass>;
- def "_X_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, GPR>;
- def "_I_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, simm5>;
+ def "_V_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, m.vrclass>,
+ Sched<[WriteVIMovV, ReadVIMovV]>;
+ def "_X_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, GPR>,
+ Sched<[WriteVIMovX, ReadVIMovX]>;
+ def "_I_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, simm5>,
+ Sched<[WriteVIMovI]>;
}
}
}
-multiclass VPseudoUnaryV_F_NoDummyMask {
+multiclass VPseudoVMV_F {
foreach m = MxList.m in {
foreach f = FPList.fpinfo in {
let VLMul = m.value in {
- def "_" # f.FX # "_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, f.fprclass>;
+ def "_" # f.FX # "_" # m.MX :
+ VPseudoUnaryNoDummyMask<m.vrclass, f.fprclass>,
+ Sched<[WriteVFMovV, ReadVFMovF]>;
}
}
}
}
-multiclass VPseudoUnaryTAV_V {
+multiclass VPseudoVCLS_V {
foreach m = MxList.m in {
let VLMul = m.value in {
- def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
- def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>;
+ def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>;
+ def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>;
}
}
}
-multiclass VPseudoUnaryV_V {
+multiclass VPseudoVSQR_V {
foreach m = MxList.m in {
let VLMul = m.value in {
- def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
- def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>;
+ def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>;
+ def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>,
+ Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>;
}
}
}
-multiclass PseudoUnaryV_VF2 {
+multiclass VPseudoVRCP_V {
+ foreach m = MxList.m in {
+ let VLMul = m.value in {
+ def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>;
+ def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>,
+ Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>;
+ }
+ }
+}
+
+multiclass PseudoVEXT_VF2 {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF2.m in
{
let VLMul = m.value in {
- def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f2vrclass,
- constraints>;
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
+ def "_" # m.MX # "_MASK" :
+ VPseudoUnaryMaskTA<m.vrclass, m.f2vrclass, constraints>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
}
}
}
-multiclass PseudoUnaryV_VF4 {
+multiclass PseudoVEXT_VF4 {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF4.m in
{
let VLMul = m.value in {
- def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f4vrclass,
- constraints>;
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
+ def "_" # m.MX # "_MASK" :
+ VPseudoUnaryMaskTA<m.vrclass, m.f4vrclass, constraints>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
}
}
}
-multiclass PseudoUnaryV_VF8 {
+multiclass PseudoVEXT_VF8 {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF8.m in
{
let VLMul = m.value in {
- def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f8vrclass,
- constraints>;
+ def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
+ def "_" # m.MX # "_MASK" :
+ VPseudoUnaryMaskTA<m.vrclass, m.f8vrclass, constraints>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
}
}
}
@@ -1874,30 +1906,172 @@ multiclass VPseudoBinaryM_VI {
!if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
}
-multiclass VPseudoBinaryV_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
- defm "" : VPseudoBinaryV_VV<Constraint>;
- defm "" : VPseudoBinaryV_VX<Constraint>;
- defm "" : VPseudoBinaryV_VI<ImmType, Constraint>;
+multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+ defm "" : VPseudoBinaryV_VV<Constraint>,
+ Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX<Constraint>,
+ Sched<[WriteVGatherX, ReadVGatherV, ReadVGatherX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VI<ImmType, Constraint>,
+ Sched<[WriteVGatherI, ReadVGatherV, ReadVMask]>;
}
-multiclass VPseudoBinaryV_VV_VX {
- defm "" : VPseudoBinaryV_VV;
- defm "" : VPseudoBinaryV_VX;
+multiclass VPseudoVSALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+ defm "" : VPseudoBinaryV_VV<Constraint>,
+ Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX<Constraint>,
+ Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VI<ImmType, Constraint>,
+ Sched<[WriteVSALUI, ReadVSALUV, ReadVMask]>;
}
-multiclass VPseudoBinaryV_VV_VF {
- defm "" : VPseudoBinaryV_VV;
- defm "" : VPseudoBinaryV_VF;
+
+multiclass VPseudoVSHT_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+ defm "" : VPseudoBinaryV_VV<Constraint>,
+ Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX<Constraint>,
+ Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VI<ImmType, Constraint>,
+ Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>;
}
-multiclass VPseudoBinaryV_VX_VI<Operand ImmType = simm5> {
- defm "" : VPseudoBinaryV_VX;
- defm "" : VPseudoBinaryV_VI<ImmType>;
+multiclass VPseudoVSSHT_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+ defm "" : VPseudoBinaryV_VV<Constraint>,
+ Sched<[WriteVSShiftV, ReadVSShiftV, ReadVSShiftV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX<Constraint>,
+ Sched<[WriteVSShiftX, ReadVSShiftV, ReadVSShiftX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VI<ImmType, Constraint>,
+ Sched<[WriteVSShiftI, ReadVSShiftV, ReadVMask]>;
}
-multiclass VPseudoBinaryW_VV_VX {
- defm "" : VPseudoBinaryW_VV;
- defm "" : VPseudoBinaryW_VX;
+multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+ defm "" : VPseudoBinaryV_VV<Constraint>,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX<Constraint>,
+ Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VI<ImmType, Constraint>,
+ Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>;
+}
+
+multiclass VPseudoVSALU_VV_VX {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX,
+ Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>;
+}
+
+multiclass VPseudoVSMUL_VV_VX {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVSMulV, ReadVSMulV, ReadVSMulV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX,
+ Sched<[WriteVSMulX, ReadVSMulV, ReadVSMulX, ReadVMask]>;
+}
+
+multiclass VPseudoVAALU_VV_VX {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVAALUV, ReadVAALUV, ReadVAALUV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX,
+ Sched<[WriteVAALUX, ReadVAALUV, ReadVAALUX, ReadVMask]>;
+}
+
+multiclass VPseudoVMINMAX_VV_VX {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX,
+ Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+}
+
+multiclass VPseudoVMUL_VV_VX {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX,
+ Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>;
+}
+
+multiclass VPseudoVDIV_VV_VX {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX,
+ Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>;
+}
+
+multiclass VPseudoVFMUL_VV_VF {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVFMulV, ReadVFMulV, ReadVFMulV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VF,
+ Sched<[WriteVFMulF, ReadVFMulV, ReadVFMulF, ReadVMask]>;
+}
+
+multiclass VPseudoVFDIV_VV_VF {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVFDivV, ReadVFDivV, ReadVFDivV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VF,
+ Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>;
+}
+
+multiclass VPseudoVFRDIV_VF {
+ defm "" : VPseudoBinaryV_VF,
+ Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>;
+}
+
+multiclass VPseudoVALU_VV_VX {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX,
+ Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>;
+}
+
+multiclass VPseudoVSGNJ_VV_VF {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVFSgnjV, ReadVFSgnjV, ReadVFSgnjV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VF,
+ Sched<[WriteVFSgnjF, ReadVFSgnjV, ReadVFSgnjF, ReadVMask]>;
+}
+
+multiclass VPseudoVMAX_VV_VF {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VF,
+ Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
+}
+
+multiclass VPseudoVALU_VV_VF {
+ defm "" : VPseudoBinaryV_VV,
+ Sched<[WriteVFALUV, ReadVFALUV, ReadVFALUV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VF,
+ Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>;
+}
+
+multiclass VPseudoVALU_VF {
+ defm "" : VPseudoBinaryV_VF,
+ Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>;
+}
+
+multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> {
+ defm "" : VPseudoBinaryV_VX,
+ Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VI<ImmType>,
+ Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>;
+}
+
+multiclass VPseudoVWALU_VV_VX {
+ defm "" : VPseudoBinaryW_VV,
+ Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>;
+ defm "" : VPseudoBinaryW_VX,
+ Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>;
+}
+
+multiclass VPseudoVWMUL_VV_VX {
+ defm "" : VPseudoBinaryW_VV,
+ Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>;
+ defm "" : VPseudoBinaryW_VX,
+ Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>;
+}
+
+multiclass VPseudoVWMUL_VV_VF {
+ defm "" : VPseudoBinaryW_VV,
+ Sched<[WriteVFWMulV, ReadVFWMulV, ReadVFWMulV, ReadVMask]>;
+ defm "" : VPseudoBinaryW_VF,
+ Sched<[WriteVFWMulF, ReadVFWMulV, ReadVFWMulF, ReadVMask]>;
}
multiclass VPseudoBinaryW_VV_VF {
@@ -1905,53 +2079,100 @@ multiclass VPseudoBinaryW_VV_VF {
defm "" : VPseudoBinaryW_VF;
}
-multiclass VPseudoBinaryW_WV_WX {
- defm "" : VPseudoBinaryW_WV;
- defm "" : VPseudoBinaryW_WX;
+multiclass VPseudoVWALU_WV_WX {
+ defm "" : VPseudoBinaryW_WV,
+ Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>;
+ defm "" : VPseudoBinaryW_WX,
+ Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>;
+}
+
+multiclass VPseudoVFWALU_VV_VF {
+ defm "" : VPseudoBinaryW_VV,
+ Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>;
+ defm "" : VPseudoBinaryW_VF,
+ Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>;
+}
+
+multiclass VPseudoVFWALU_WV_WF {
+ defm "" : VPseudoBinaryW_WV,
+ Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>;
+ defm "" : VPseudoBinaryW_WF,
+ Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>;
+}
+
+multiclass VPseudoVMRG_VM_XM_IM {
+ defm "" : VPseudoBinaryV_VM,
+ Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_XM,
+ Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_IM,
+ Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>;
}
-multiclass VPseudoBinaryW_WV_WF {
- defm "" : VPseudoBinaryW_WV;
- defm "" : VPseudoBinaryW_WF;
+multiclass VPseudoVCALU_VM_XM_IM {
+ defm "" : VPseudoBinaryV_VM,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_XM,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_IM,
+ Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>;
}
-multiclass VPseudoBinaryV_VM_XM_IM {
- defm "" : VPseudoBinaryV_VM;
- defm "" : VPseudoBinaryV_XM;
- defm "" : VPseudoBinaryV_IM;
+multiclass VPseudoVCALU_VM_XM {
+ defm "" : VPseudoBinaryV_VM,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_XM,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
}
-multiclass VPseudoBinaryV_VM_XM {
- defm "" : VPseudoBinaryV_VM;
- defm "" : VPseudoBinaryV_XM;
+multiclass VPseudoVCALUM_VM_XM_IM<string Constraint> {
+ defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>,
+ Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>;
}
-multiclass VPseudoBinaryM_VM_XM_IM<string Constraint> {
- defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>;
- defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>;
- defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>;
+multiclass VPseudoVCALUM_VM_XM<string Constraint> {
+ defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
}
-multiclass VPseudoBinaryM_VM_XM<string Constraint> {
- defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>;
- defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>;
+multiclass VPseudoVCALUM_V_X_I<string Constraint> {
+ defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>;
+ defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>;
+ defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>,
+ Sched<[WriteVICALUI, ReadVIALUCV]>;
}
-multiclass VPseudoBinaryM_V_X_I<string Constraint> {
- defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>;
- defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>;
- defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>;
+multiclass VPseudoVCALUM_V_X<string Constraint> {
+ defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>;
+ defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>;
}
-multiclass VPseudoBinaryM_V_X<string Constraint> {
- defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>;
- defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>;
+multiclass VPseudoVNCLP_WV_WX_WI {
+ defm "" : VPseudoBinaryV_WV,
+ Sched<[WriteVNClipV, ReadVNClipV, ReadVNClipV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_WX,
+ Sched<[WriteVNClipX, ReadVNClipV, ReadVNClipX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_WI,
+ Sched<[WriteVNClipI, ReadVNClipV, ReadVMask]>;
}
-multiclass VPseudoBinaryV_WV_WX_WI {
- defm "" : VPseudoBinaryV_WV;
- defm "" : VPseudoBinaryV_WX;
- defm "" : VPseudoBinaryV_WI;
+multiclass VPseudoVNSHT_WV_WX_WI {
+ defm "" : VPseudoBinaryV_WV,
+ Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>;
+ defm "" : VPseudoBinaryV_WX,
+ Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_WI,
+ Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>;
}
multiclass VPseudoTernary<VReg RetClass,
@@ -2031,55 +2252,113 @@ multiclass VPseudoTernaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
defm _VI : VPseudoTernary<m.vrclass, m.vrclass, ImmType, m, Constraint>;
}
-multiclass VPseudoTernaryV_VV_VX_AAXA<string Constraint = ""> {
- defm "" : VPseudoTernaryV_VV_AAXA<Constraint>;
- defm "" : VPseudoTernaryV_VX_AAXA<Constraint>;
+multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> {
+ defm "" : VPseudoTernaryV_VV_AAXA<Constraint>,
+ Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>;
+ defm "" : VPseudoTernaryV_VX_AAXA<Constraint>,
+ Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>;
}
-multiclass VPseudoTernaryV_VV_VF_AAXA<string Constraint = ""> {
- defm "" : VPseudoTernaryV_VV_AAXA<Constraint>;
- defm "" : VPseudoTernaryV_VF_AAXA<Constraint>;
+multiclass VPseudoVMAC_VV_VF_AAXA<string Constraint = ""> {
+ defm "" : VPseudoTernaryV_VV_AAXA<Constraint>,
+ Sched<[WriteVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVMask]>;
+ defm "" : VPseudoTernaryV_VF_AAXA<Constraint>,
+ Sched<[WriteVFMulAddF, ReadVFMulAddV, ReadVFMulAddV, ReadVFMulAddF, ReadVMask]>;
}
-multiclass VPseudoTernaryV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
- defm "" : VPseudoTernaryV_VX<Constraint>;
- defm "" : VPseudoTernaryV_VI<ImmType, Constraint>;
+multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+ defm "" : VPseudoTernaryV_VX<Constraint>,
+ Sched<[WriteVISlideX, ReadVISlideV, ReadVISlideV, ReadVISlideX, ReadVMask]>;
+ defm "" : VPseudoTernaryV_VI<ImmType, Constraint>,
+ Sched<[WriteVISlideI, ReadVISlideV, ReadVISlideV, ReadVMask]>;
}
-multiclass VPseudoTernaryW_VV_VX {
- defm "" : VPseudoTernaryW_VV;
- defm "" : VPseudoTernaryW_VX;
+multiclass VPseudoVWMAC_VV_VX {
+ defm "" : VPseudoTernaryW_VV,
+ Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>;
+ defm "" : VPseudoTernaryW_VX,
+ Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>;
}
-multiclass VPseudoTernaryW_VV_VF {
- defm "" : VPseudoTernaryW_VV;
- defm "" : VPseudoTernaryW_VF;
+multiclass VPseudoVWMAC_VX {
+ defm "" : VPseudoTernaryW_VX,
+ Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>;
}
-multiclass VPseudoBinaryM_VV_VX_VI {
- defm "" : VPseudoBinaryM_VV;
- defm "" : VPseudoBinaryM_VX;
- defm "" : VPseudoBinaryM_VI;
+multiclass VPseudoVWMAC_VV_VF {
+ defm "" : VPseudoTernaryW_VV,
+ Sched<[WriteVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVMask]>;
+ defm "" : VPseudoTernaryW_VF,
+ Sched<[WriteVFWMulAddF, ReadVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddF, ReadVMask]>;
}
-multiclass VPseudoBinaryM_VV_VX {
- defm "" : VPseudoBinaryM_VV;
- defm "" : VPseudoBinaryM_VX;
+multiclass VPseudoVCMPM_VV_VX_VI {
+ defm "" : VPseudoBinaryM_VV,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VX,
+ Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VI,
+ Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>;
}
-multiclass VPseudoBinaryM_VV_VF {
- defm "" : VPseudoBinaryM_VV;
- defm "" : VPseudoBinaryM_VF;
+multiclass VPseudoVCMPM_VV_VX {
+ defm "" : VPseudoBinaryM_VV,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VX,
+ Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>;
}
-multiclass VPseudoBinaryM_VX_VI {
- defm "" : VPseudoBinaryM_VX;
- defm "" : VPseudoBinaryM_VI;
+multiclass VPseudoVCMPM_VV_VF {
+ defm "" : VPseudoBinaryM_VV,
+ Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VF,
+ Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
}
-multiclass VPseudoReductionV_VS {
+multiclass VPseudoVCMPM_VF {
+ defm "" : VPseudoBinaryM_VF,
+ Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
+}
+
+multiclass VPseudoVCMPM_VX_VI {
+ defm "" : VPseudoBinaryM_VX,
+ Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+ defm "" : VPseudoBinaryM_VI,
+ Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>;
+}
+
+multiclass VPseudoVRED_VS {
foreach m = MxList.m in {
- defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>;
+ defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
+ Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV, ReadVIRedV, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVWRED_VS {
+ foreach m = MxList.m in {
+ defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
+ Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV, ReadVIWRedV, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVFRED_VS {
+ foreach m = MxList.m in {
+ defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
+ Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV, ReadVFRedV, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVFREDO_VS {
+ foreach m = MxList.m in {
+ defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
+ Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVMask]>;
+ }
+}
+
+multiclass VPseudoVFWRED_VS {
+ foreach m = MxList.m in {
+ defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
+ Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVMask]>;
}
}
@@ -2094,9 +2373,16 @@ multiclass VPseudoConversion<VReg RetClass,
}
}
-multiclass VPseudoConversionV_V {
+multiclass VPseudoVCVTI_V {
+ foreach m = MxList.m in
+ defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
+ Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>;
+}
+
+multiclass VPseudoVCVTF_V {
foreach m = MxList.m in
- defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>;
+ defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
+ Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>;
}
multiclass VPseudoConversionW_V {
@@ -2105,10 +2391,46 @@ multiclass VPseudoConversionW_V {
defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>;
}
-multiclass VPseudoConversionV_W {
+multiclass VPseudoVWCVTI_V {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m[0-5] in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
+ Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>;
+}
+
+multiclass VPseudoVWCVTF_V {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m[0-5] in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
+ Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>;
+}
+
+multiclass VPseudoVWCVTD_V {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m[0-5] in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
+ Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>;
+}
+
+multiclass VPseudoVNCVTI_W {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m[0-5] in
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
+ Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>;
+}
+
+multiclass VPseudoVNCVTF_W {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxList.m[0-5] in
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
+ Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>;
+}
+
+multiclass VPseudoVNCVTD_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW.m in
- defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>;
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
+ Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>;
}
multiclass VPseudoUSSegLoad<bit isFF> {
@@ -2543,42 +2865,6 @@ class VPatTernaryMask<string intrinsic,
(mask_type V0),
GPR:$vl, sew)>;
-class VPatAMOWDNoMask<string intrinsic_name,
- string inst,
- ValueType result_type,
- ValueType op1_type,
- int sew,
- LMULInfo vlmul,
- LMULInfo emul,
- VReg op1_reg_class> :
- Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
- GPR:$rs1,
- (op1_type op1_reg_class:$vs2),
- (result_type vlmul.vrclass:$vd),
- VLOpFrag)),
- (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX)
- $rs1, $vs2, $vd,
- GPR:$vl, sew)>;
-
-class VPatAMOWDMask<string intrinsic_name,
- string inst,
- ValueType result_type,
- ValueType op1_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- LMULInfo emul,
- VReg op1_reg_class> :
- Pat<(result_type (!cast<Intrinsic>(intrinsic_name # "_mask")
- GPR:$rs1,
- (op1_type op1_reg_class:$vs2),
- (result_type vlmul.vrclass:$vd),
- (mask_type V0),
- VLOpFrag)),
- (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX # "_MASK")
- $rs1, $vs2, $vd,
- (mask_type V0), GPR:$vl, sew)>;
-
multiclass VPatUnaryS_M<string intrinsic_name,
string inst>
{
@@ -3416,44 +3702,6 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
}
}
-multiclass VPatAMOWD<string intrinsic,
- string inst,
- ValueType result_type,
- ValueType offset_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- LMULInfo emul,
- VReg op1_reg_class>
-{
- def : VPatAMOWDNoMask<intrinsic, inst, result_type, offset_type,
- sew, vlmul, emul, op1_reg_class>;
- def : VPatAMOWDMask<intrinsic, inst, result_type, offset_type,
- mask_type, sew, vlmul, emul, op1_reg_class>;
-}
-
-multiclass VPatAMOV_WD<string intrinsic,
- string inst,
- list<VTypeInfo> vtilist> {
- foreach eew = EEWList in {
- foreach vti = vtilist in {
- if !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64)) then {
- defvar octuple_lmul = vti.LMul.octuple;
- // Calculate emul = eew * lmul / sew
- defvar octuple_emul = !srl(!mul(eew, octuple_lmul), vti.Log2SEW);
- if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
- defvar emulMX = octuple_to_str<octuple_emul>.ret;
- defvar offsetVti = !cast<VTypeInfo>("VI" # eew # emulMX);
- defvar inst_ei = inst # "EI" # eew;
- defm : VPatAMOWD<intrinsic, inst_ei,
- vti.Vector, offsetVti.Vector,
- vti.Mask, vti.Log2SEW, vti.LMul, offsetVti.LMul, offsetVti.RegClass>;
- }
- }
- }
- }
-}
-
//===----------------------------------------------------------------------===//
// Pseudo instructions
//===----------------------------------------------------------------------===//
@@ -3531,11 +3779,13 @@ def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei),
//===----------------------------------------------------------------------===//
// Pseudos Unit-Stride Loads and Stores
-defm PseudoVL : VPseudoUSLoad</*isFF=*/false>;
+defm PseudoVL : VPseudoUSLoad;
defm PseudoVS : VPseudoUSStore;
-defm PseudoVLM : VPseudoLoadMask;
-defm PseudoVSM : VPseudoStoreMask;
+defm PseudoVLM : VPseudoLoadMask,
+ Sched<[WriteVLDM, ReadVLDX]>;
+defm PseudoVSM : VPseudoStoreMask,
+ Sched<[WriteVSTM, ReadVSTX]>;
//===----------------------------------------------------------------------===//
// 7.5 Vector Strided Instructions
@@ -3561,7 +3811,7 @@ defm PseudoVSUX : VPseudoIStore</*Ordered=*/false>;
// vleff may update VL register
let hasSideEffects = 1, Defs = [VL] in
-defm PseudoVL : VPseudoUSLoad</*isFF=*/true>;
+defm PseudoVL : VPseudoFFLoad;
//===----------------------------------------------------------------------===//
// 7.8. Vector Load/Store Segment Instructions
@@ -3580,28 +3830,15 @@ let hasSideEffects = 1, Defs = [VL] in
defm PseudoVLSEG : VPseudoUSSegLoad</*isFF=*/true>;
//===----------------------------------------------------------------------===//
-// 8. Vector AMO Operations
-//===----------------------------------------------------------------------===//
-defm PseudoVAMOSWAP : VPseudoAMO;
-defm PseudoVAMOADD : VPseudoAMO;
-defm PseudoVAMOXOR : VPseudoAMO;
-defm PseudoVAMOAND : VPseudoAMO;
-defm PseudoVAMOOR : VPseudoAMO;
-defm PseudoVAMOMIN : VPseudoAMO;
-defm PseudoVAMOMAX : VPseudoAMO;
-defm PseudoVAMOMINU : VPseudoAMO;
-defm PseudoVAMOMAXU : VPseudoAMO;
-
-//===----------------------------------------------------------------------===//
// 12. Vector Integer Arithmetic Instructions
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// 12.1. Vector Single-Width Integer Add and Subtract
//===----------------------------------------------------------------------===//
-defm PseudoVADD : VPseudoBinaryV_VV_VX_VI;
-defm PseudoVSUB : VPseudoBinaryV_VV_VX;
-defm PseudoVRSUB : VPseudoBinaryV_VX_VI;
+defm PseudoVADD : VPseudoVALU_VV_VX_VI;
+defm PseudoVSUB : VPseudoVALU_VV_VX;
+defm PseudoVRSUB : VPseudoVALU_VX_VI;
foreach vti = AllIntegerVectors in {
// Match vrsub with 2 vector operands to vsub.vv by swapping operands. This
@@ -3657,166 +3894,166 @@ foreach vti = AllIntegerVectors in {
//===----------------------------------------------------------------------===//
// 12.2. Vector Widening Integer Add/Subtract
//===----------------------------------------------------------------------===//
-defm PseudoVWADDU : VPseudoBinaryW_VV_VX;
-defm PseudoVWSUBU : VPseudoBinaryW_VV_VX;
-defm PseudoVWADD : VPseudoBinaryW_VV_VX;
-defm PseudoVWSUB : VPseudoBinaryW_VV_VX;
-defm PseudoVWADDU : VPseudoBinaryW_WV_WX;
-defm PseudoVWSUBU : VPseudoBinaryW_WV_WX;
-defm PseudoVWADD : VPseudoBinaryW_WV_WX;
-defm PseudoVWSUB : VPseudoBinaryW_WV_WX;
+defm PseudoVWADDU : VPseudoVWALU_VV_VX;
+defm PseudoVWSUBU : VPseudoVWALU_VV_VX;
+defm PseudoVWADD : VPseudoVWALU_VV_VX;
+defm PseudoVWSUB : VPseudoVWALU_VV_VX;
+defm PseudoVWADDU : VPseudoVWALU_WV_WX;
+defm PseudoVWSUBU : VPseudoVWALU_WV_WX;
+defm PseudoVWADD : VPseudoVWALU_WV_WX;
+defm PseudoVWSUB : VPseudoVWALU_WV_WX;
//===----------------------------------------------------------------------===//
// 12.3. Vector Integer Extension
//===----------------------------------------------------------------------===//
-defm PseudoVZEXT_VF2 : PseudoUnaryV_VF2;
-defm PseudoVZEXT_VF4 : PseudoUnaryV_VF4;
-defm PseudoVZEXT_VF8 : PseudoUnaryV_VF8;
-defm PseudoVSEXT_VF2 : PseudoUnaryV_VF2;
-defm PseudoVSEXT_VF4 : PseudoUnaryV_VF4;
-defm PseudoVSEXT_VF8 : PseudoUnaryV_VF8;
+defm PseudoVZEXT_VF2 : PseudoVEXT_VF2;
+defm PseudoVZEXT_VF4 : PseudoVEXT_VF4;
+defm PseudoVZEXT_VF8 : PseudoVEXT_VF8;
+defm PseudoVSEXT_VF2 : PseudoVEXT_VF2;
+defm PseudoVSEXT_VF4 : PseudoVEXT_VF4;
+defm PseudoVSEXT_VF8 : PseudoVEXT_VF8;
//===----------------------------------------------------------------------===//
// 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVADC : VPseudoBinaryV_VM_XM_IM;
-defm PseudoVMADC : VPseudoBinaryM_VM_XM_IM<"@earlyclobber $rd">;
-defm PseudoVMADC : VPseudoBinaryM_V_X_I<"@earlyclobber $rd">;
+defm PseudoVADC : VPseudoVCALU_VM_XM_IM;
+defm PseudoVMADC : VPseudoVCALUM_VM_XM_IM<"@earlyclobber $rd">;
+defm PseudoVMADC : VPseudoVCALUM_V_X_I<"@earlyclobber $rd">;
-defm PseudoVSBC : VPseudoBinaryV_VM_XM;
-defm PseudoVMSBC : VPseudoBinaryM_VM_XM<"@earlyclobber $rd">;
-defm PseudoVMSBC : VPseudoBinaryM_V_X<"@earlyclobber $rd">;
+defm PseudoVSBC : VPseudoVCALU_VM_XM;
+defm PseudoVMSBC : VPseudoVCALUM_VM_XM<"@earlyclobber $rd">;
+defm PseudoVMSBC : VPseudoVCALUM_V_X<"@earlyclobber $rd">;
//===----------------------------------------------------------------------===//
// 12.5. Vector Bitwise Logical Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVAND : VPseudoBinaryV_VV_VX_VI;
-defm PseudoVOR : VPseudoBinaryV_VV_VX_VI;
-defm PseudoVXOR : VPseudoBinaryV_VV_VX_VI;
+defm PseudoVAND : VPseudoVALU_VV_VX_VI;
+defm PseudoVOR : VPseudoVALU_VV_VX_VI;
+defm PseudoVXOR : VPseudoVALU_VV_VX_VI;
//===----------------------------------------------------------------------===//
// 12.6. Vector Single-Width Bit Shift Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVSLL : VPseudoBinaryV_VV_VX_VI<uimm5>;
-defm PseudoVSRL : VPseudoBinaryV_VV_VX_VI<uimm5>;
-defm PseudoVSRA : VPseudoBinaryV_VV_VX_VI<uimm5>;
+defm PseudoVSLL : VPseudoVSHT_VV_VX_VI<uimm5>;
+defm PseudoVSRL : VPseudoVSHT_VV_VX_VI<uimm5>;
+defm PseudoVSRA : VPseudoVSHT_VV_VX_VI<uimm5>;
//===----------------------------------------------------------------------===//
// 12.7. Vector Narrowing Integer Right Shift Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVNSRL : VPseudoBinaryV_WV_WX_WI;
-defm PseudoVNSRA : VPseudoBinaryV_WV_WX_WI;
+defm PseudoVNSRL : VPseudoVNSHT_WV_WX_WI;
+defm PseudoVNSRA : VPseudoVNSHT_WV_WX_WI;
//===----------------------------------------------------------------------===//
// 12.8. Vector Integer Comparison Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMSEQ : VPseudoBinaryM_VV_VX_VI;
-defm PseudoVMSNE : VPseudoBinaryM_VV_VX_VI;
-defm PseudoVMSLTU : VPseudoBinaryM_VV_VX;
-defm PseudoVMSLT : VPseudoBinaryM_VV_VX;
-defm PseudoVMSLEU : VPseudoBinaryM_VV_VX_VI;
-defm PseudoVMSLE : VPseudoBinaryM_VV_VX_VI;
-defm PseudoVMSGTU : VPseudoBinaryM_VX_VI;
-defm PseudoVMSGT : VPseudoBinaryM_VX_VI;
+defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI;
+defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI;
+defm PseudoVMSLTU : VPseudoVCMPM_VV_VX;
+defm PseudoVMSLT : VPseudoVCMPM_VV_VX;
+defm PseudoVMSLEU : VPseudoVCMPM_VV_VX_VI;
+defm PseudoVMSLE : VPseudoVCMPM_VV_VX_VI;
+defm PseudoVMSGTU : VPseudoVCMPM_VX_VI;
+defm PseudoVMSGT : VPseudoVCMPM_VX_VI;
//===----------------------------------------------------------------------===//
// 12.9. Vector Integer Min/Max Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMINU : VPseudoBinaryV_VV_VX;
-defm PseudoVMIN : VPseudoBinaryV_VV_VX;
-defm PseudoVMAXU : VPseudoBinaryV_VV_VX;
-defm PseudoVMAX : VPseudoBinaryV_VV_VX;
+defm PseudoVMINU : VPseudoVMINMAX_VV_VX;
+defm PseudoVMIN : VPseudoVMINMAX_VV_VX;
+defm PseudoVMAXU : VPseudoVMINMAX_VV_VX;
+defm PseudoVMAX : VPseudoVMINMAX_VV_VX;
//===----------------------------------------------------------------------===//
// 12.10. Vector Single-Width Integer Multiply Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMUL : VPseudoBinaryV_VV_VX;
-defm PseudoVMULH : VPseudoBinaryV_VV_VX;
-defm PseudoVMULHU : VPseudoBinaryV_VV_VX;
-defm PseudoVMULHSU : VPseudoBinaryV_VV_VX;
+defm PseudoVMUL : VPseudoVMUL_VV_VX;
+defm PseudoVMULH : VPseudoVMUL_VV_VX;
+defm PseudoVMULHU : VPseudoVMUL_VV_VX;
+defm PseudoVMULHSU : VPseudoVMUL_VV_VX;
//===----------------------------------------------------------------------===//
// 12.11. Vector Integer Divide Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVDIVU : VPseudoBinaryV_VV_VX;
-defm PseudoVDIV : VPseudoBinaryV_VV_VX;
-defm PseudoVREMU : VPseudoBinaryV_VV_VX;
-defm PseudoVREM : VPseudoBinaryV_VV_VX;
+defm PseudoVDIVU : VPseudoVDIV_VV_VX;
+defm PseudoVDIV : VPseudoVDIV_VV_VX;
+defm PseudoVREMU : VPseudoVDIV_VV_VX;
+defm PseudoVREM : VPseudoVDIV_VV_VX;
//===----------------------------------------------------------------------===//
// 12.12. Vector Widening Integer Multiply Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVWMUL : VPseudoBinaryW_VV_VX;
-defm PseudoVWMULU : VPseudoBinaryW_VV_VX;
-defm PseudoVWMULSU : VPseudoBinaryW_VV_VX;
+defm PseudoVWMUL : VPseudoVWMUL_VV_VX;
+defm PseudoVWMULU : VPseudoVWMUL_VV_VX;
+defm PseudoVWMULSU : VPseudoVWMUL_VV_VX;
//===----------------------------------------------------------------------===//
// 12.13. Vector Single-Width Integer Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMACC : VPseudoTernaryV_VV_VX_AAXA;
-defm PseudoVNMSAC : VPseudoTernaryV_VV_VX_AAXA;
-defm PseudoVMADD : VPseudoTernaryV_VV_VX_AAXA;
-defm PseudoVNMSUB : VPseudoTernaryV_VV_VX_AAXA;
+defm PseudoVMACC : VPseudoVMAC_VV_VX_AAXA;
+defm PseudoVNMSAC : VPseudoVMAC_VV_VX_AAXA;
+defm PseudoVMADD : VPseudoVMAC_VV_VX_AAXA;
+defm PseudoVNMSUB : VPseudoVMAC_VV_VX_AAXA;
//===----------------------------------------------------------------------===//
// 12.14. Vector Widening Integer Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVWMACCU : VPseudoTernaryW_VV_VX;
-defm PseudoVWMACC : VPseudoTernaryW_VV_VX;
-defm PseudoVWMACCSU : VPseudoTernaryW_VV_VX;
-defm PseudoVWMACCUS : VPseudoTernaryW_VX;
+defm PseudoVWMACCU : VPseudoVWMAC_VV_VX;
+defm PseudoVWMACC : VPseudoVWMAC_VV_VX;
+defm PseudoVWMACCSU : VPseudoVWMAC_VV_VX;
+defm PseudoVWMACCUS : VPseudoVWMAC_VX;
//===----------------------------------------------------------------------===//
// 12.15. Vector Integer Merge Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMERGE : VPseudoBinaryV_VM_XM_IM;
+defm PseudoVMERGE : VPseudoVMRG_VM_XM_IM;
//===----------------------------------------------------------------------===//
// 12.16. Vector Integer Move Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMV_V : VPseudoUnaryV_V_X_I_NoDummyMask;
+defm PseudoVMV_V : VPseudoUnaryVMV_V_X_I;
//===----------------------------------------------------------------------===//
// 13.1. Vector Single-Width Saturating Add and Subtract
//===----------------------------------------------------------------------===//
let Defs = [VXSAT], hasSideEffects = 1 in {
- defm PseudoVSADDU : VPseudoBinaryV_VV_VX_VI;
- defm PseudoVSADD : VPseudoBinaryV_VV_VX_VI;
- defm PseudoVSSUBU : VPseudoBinaryV_VV_VX;
- defm PseudoVSSUB : VPseudoBinaryV_VV_VX;
+ defm PseudoVSADDU : VPseudoVSALU_VV_VX_VI;
+ defm PseudoVSADD : VPseudoVSALU_VV_VX_VI;
+ defm PseudoVSSUBU : VPseudoVSALU_VV_VX;
+ defm PseudoVSSUB : VPseudoVSALU_VV_VX;
}
//===----------------------------------------------------------------------===//
// 13.2. Vector Single-Width Averaging Add and Subtract
//===----------------------------------------------------------------------===//
let Uses = [VXRM], hasSideEffects = 1 in {
- defm PseudoVAADDU : VPseudoBinaryV_VV_VX;
- defm PseudoVAADD : VPseudoBinaryV_VV_VX;
- defm PseudoVASUBU : VPseudoBinaryV_VV_VX;
- defm PseudoVASUB : VPseudoBinaryV_VV_VX;
+ defm PseudoVAADDU : VPseudoVAALU_VV_VX;
+ defm PseudoVAADD : VPseudoVAALU_VV_VX;
+ defm PseudoVASUBU : VPseudoVAALU_VV_VX;
+ defm PseudoVASUB : VPseudoVAALU_VV_VX;
}
//===----------------------------------------------------------------------===//
// 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
//===----------------------------------------------------------------------===//
let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
- defm PseudoVSMUL : VPseudoBinaryV_VV_VX;
+ defm PseudoVSMUL : VPseudoVSMUL_VV_VX;
}
//===----------------------------------------------------------------------===//
// 13.4. Vector Single-Width Scaling Shift Instructions
//===----------------------------------------------------------------------===//
let Uses = [VXRM], hasSideEffects = 1 in {
- defm PseudoVSSRL : VPseudoBinaryV_VV_VX_VI<uimm5>;
- defm PseudoVSSRA : VPseudoBinaryV_VV_VX_VI<uimm5>;
+ defm PseudoVSSRL : VPseudoVSSHT_VV_VX_VI<uimm5>;
+ defm PseudoVSSRA : VPseudoVSSHT_VV_VX_VI<uimm5>;
}
//===----------------------------------------------------------------------===//
// 13.5. Vector Narrowing Fixed-Point Clip Instructions
//===----------------------------------------------------------------------===//
let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
- defm PseudoVNCLIP : VPseudoBinaryV_WV_WX_WI;
- defm PseudoVNCLIPU : VPseudoBinaryV_WV_WX_WI;
+ defm PseudoVNCLIP : VPseudoVNCLP_WV_WX_WI;
+ defm PseudoVNCLIPU : VPseudoVNCLP_WV_WX_WI;
}
} // Predicates = [HasVInstructions]
@@ -3825,156 +4062,156 @@ let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFADD : VPseudoBinaryV_VV_VF;
-defm PseudoVFSUB : VPseudoBinaryV_VV_VF;
-defm PseudoVFRSUB : VPseudoBinaryV_VF;
+defm PseudoVFADD : VPseudoVALU_VV_VF;
+defm PseudoVFSUB : VPseudoVALU_VV_VF;
+defm PseudoVFRSUB : VPseudoVALU_VF;
//===----------------------------------------------------------------------===//
// 14.3. Vector Widening Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFWADD : VPseudoBinaryW_VV_VF;
-defm PseudoVFWSUB : VPseudoBinaryW_VV_VF;
-defm PseudoVFWADD : VPseudoBinaryW_WV_WF;
-defm PseudoVFWSUB : VPseudoBinaryW_WV_WF;
+defm PseudoVFWADD : VPseudoVFWALU_VV_VF;
+defm PseudoVFWSUB : VPseudoVFWALU_VV_VF;
+defm PseudoVFWADD : VPseudoVFWALU_WV_WF;
+defm PseudoVFWSUB : VPseudoVFWALU_WV_WF;
//===----------------------------------------------------------------------===//
// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFMUL : VPseudoBinaryV_VV_VF;
-defm PseudoVFDIV : VPseudoBinaryV_VV_VF;
-defm PseudoVFRDIV : VPseudoBinaryV_VF;
+defm PseudoVFMUL : VPseudoVFMUL_VV_VF;
+defm PseudoVFDIV : VPseudoVFDIV_VV_VF;
+defm PseudoVFRDIV : VPseudoVFRDIV_VF;
//===----------------------------------------------------------------------===//
// 14.5. Vector Widening Floating-Point Multiply
//===----------------------------------------------------------------------===//
-defm PseudoVFWMUL : VPseudoBinaryW_VV_VF;
+defm PseudoVFWMUL : VPseudoVWMUL_VV_VF;
//===----------------------------------------------------------------------===//
// 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFMACC : VPseudoTernaryV_VV_VF_AAXA;
-defm PseudoVFNMACC : VPseudoTernaryV_VV_VF_AAXA;
-defm PseudoVFMSAC : VPseudoTernaryV_VV_VF_AAXA;
-defm PseudoVFNMSAC : VPseudoTernaryV_VV_VF_AAXA;
-defm PseudoVFMADD : VPseudoTernaryV_VV_VF_AAXA;
-defm PseudoVFNMADD : VPseudoTernaryV_VV_VF_AAXA;
-defm PseudoVFMSUB : VPseudoTernaryV_VV_VF_AAXA;
-defm PseudoVFNMSUB : VPseudoTernaryV_VV_VF_AAXA;
+defm PseudoVFMACC : VPseudoVMAC_VV_VF_AAXA;
+defm PseudoVFNMACC : VPseudoVMAC_VV_VF_AAXA;
+defm PseudoVFMSAC : VPseudoVMAC_VV_VF_AAXA;
+defm PseudoVFNMSAC : VPseudoVMAC_VV_VF_AAXA;
+defm PseudoVFMADD : VPseudoVMAC_VV_VF_AAXA;
+defm PseudoVFNMADD : VPseudoVMAC_VV_VF_AAXA;
+defm PseudoVFMSUB : VPseudoVMAC_VV_VF_AAXA;
+defm PseudoVFNMSUB : VPseudoVMAC_VV_VF_AAXA;
//===----------------------------------------------------------------------===//
// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFWMACC : VPseudoTernaryW_VV_VF;
-defm PseudoVFWNMACC : VPseudoTernaryW_VV_VF;
-defm PseudoVFWMSAC : VPseudoTernaryW_VV_VF;
-defm PseudoVFWNMSAC : VPseudoTernaryW_VV_VF;
+defm PseudoVFWMACC : VPseudoVWMAC_VV_VF;
+defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF;
+defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF;
+defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF;
//===----------------------------------------------------------------------===//
// 14.8. Vector Floating-Point Square-Root Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFSQRT : VPseudoUnaryTAV_V;
+defm PseudoVFSQRT : VPseudoVSQR_V;
//===----------------------------------------------------------------------===//
// 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFRSQRT7 : VPseudoUnaryTAV_V;
+defm PseudoVFRSQRT7 : VPseudoVRCP_V;
//===----------------------------------------------------------------------===//
// 14.10. Vector Floating-Point Reciprocal Estimate Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFREC7 : VPseudoUnaryTAV_V;
+defm PseudoVFREC7 : VPseudoVRCP_V;
//===----------------------------------------------------------------------===//
// 14.11. Vector Floating-Point Min/Max Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFMIN : VPseudoBinaryV_VV_VF;
-defm PseudoVFMAX : VPseudoBinaryV_VV_VF;
+defm PseudoVFMIN : VPseudoVMAX_VV_VF;
+defm PseudoVFMAX : VPseudoVMAX_VV_VF;
//===----------------------------------------------------------------------===//
// 14.12. Vector Floating-Point Sign-Injection Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFSGNJ : VPseudoBinaryV_VV_VF;
-defm PseudoVFSGNJN : VPseudoBinaryV_VV_VF;
-defm PseudoVFSGNJX : VPseudoBinaryV_VV_VF;
+defm PseudoVFSGNJ : VPseudoVSGNJ_VV_VF;
+defm PseudoVFSGNJN : VPseudoVSGNJ_VV_VF;
+defm PseudoVFSGNJX : VPseudoVSGNJ_VV_VF;
//===----------------------------------------------------------------------===//
// 14.13. Vector Floating-Point Compare Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMFEQ : VPseudoBinaryM_VV_VF;
-defm PseudoVMFNE : VPseudoBinaryM_VV_VF;
-defm PseudoVMFLT : VPseudoBinaryM_VV_VF;
-defm PseudoVMFLE : VPseudoBinaryM_VV_VF;
-defm PseudoVMFGT : VPseudoBinaryM_VF;
-defm PseudoVMFGE : VPseudoBinaryM_VF;
+defm PseudoVMFEQ : VPseudoVCMPM_VV_VF;
+defm PseudoVMFNE : VPseudoVCMPM_VV_VF;
+defm PseudoVMFLT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFLE : VPseudoVCMPM_VV_VF;
+defm PseudoVMFGT : VPseudoVCMPM_VF;
+defm PseudoVMFGE : VPseudoVCMPM_VF;
//===----------------------------------------------------------------------===//
// 14.14. Vector Floating-Point Classify Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFCLASS : VPseudoUnaryV_V;
+defm PseudoVFCLASS : VPseudoVCLS_V;
//===----------------------------------------------------------------------===//
// 14.15. Vector Floating-Point Merge Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFMERGE : VPseudoBinaryV_FM;
+defm PseudoVFMERGE : VPseudoVMRG_FM;
//===----------------------------------------------------------------------===//
// 14.16. Vector Floating-Point Move Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFMV_V : VPseudoUnaryV_F_NoDummyMask;
+defm PseudoVFMV_V : VPseudoVMV_F;
//===----------------------------------------------------------------------===//
// 14.17. Single-Width Floating-Point/Integer Type-Convert Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFCVT_XU_F : VPseudoConversionV_V;
-defm PseudoVFCVT_X_F : VPseudoConversionV_V;
-defm PseudoVFCVT_RTZ_XU_F : VPseudoConversionV_V;
-defm PseudoVFCVT_RTZ_X_F : VPseudoConversionV_V;
-defm PseudoVFCVT_F_XU : VPseudoConversionV_V;
-defm PseudoVFCVT_F_X : VPseudoConversionV_V;
+defm PseudoVFCVT_XU_F : VPseudoVCVTI_V;
+defm PseudoVFCVT_X_F : VPseudoVCVTI_V;
+defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V;
+defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V;
+defm PseudoVFCVT_F_XU : VPseudoVCVTF_V;
+defm PseudoVFCVT_F_X : VPseudoVCVTF_V;
//===----------------------------------------------------------------------===//
// 14.18. Widening Floating-Point/Integer Type-Convert Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFWCVT_XU_F : VPseudoConversionW_V;
-defm PseudoVFWCVT_X_F : VPseudoConversionW_V;
-defm PseudoVFWCVT_RTZ_XU_F : VPseudoConversionW_V;
-defm PseudoVFWCVT_RTZ_X_F : VPseudoConversionW_V;
-defm PseudoVFWCVT_F_XU : VPseudoConversionW_V;
-defm PseudoVFWCVT_F_X : VPseudoConversionW_V;
-defm PseudoVFWCVT_F_F : VPseudoConversionW_V;
+defm PseudoVFWCVT_XU_F : VPseudoVWCVTI_V;
+defm PseudoVFWCVT_X_F : VPseudoVWCVTI_V;
+defm PseudoVFWCVT_RTZ_XU_F : VPseudoVWCVTI_V;
+defm PseudoVFWCVT_RTZ_X_F : VPseudoVWCVTI_V;
+defm PseudoVFWCVT_F_XU : VPseudoVWCVTF_V;
+defm PseudoVFWCVT_F_X : VPseudoVWCVTF_V;
+defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V;
//===----------------------------------------------------------------------===//
// 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFNCVT_XU_F : VPseudoConversionV_W;
-defm PseudoVFNCVT_X_F : VPseudoConversionV_W;
-defm PseudoVFNCVT_RTZ_XU_F : VPseudoConversionV_W;
-defm PseudoVFNCVT_RTZ_X_F : VPseudoConversionV_W;
-defm PseudoVFNCVT_F_XU : VPseudoConversionV_W;
-defm PseudoVFNCVT_F_X : VPseudoConversionV_W;
-defm PseudoVFNCVT_F_F : VPseudoConversionV_W;
-defm PseudoVFNCVT_ROD_F_F : VPseudoConversionV_W;
+defm PseudoVFNCVT_XU_F : VPseudoVNCVTI_W;
+defm PseudoVFNCVT_X_F : VPseudoVNCVTI_W;
+defm PseudoVFNCVT_RTZ_XU_F : VPseudoVNCVTI_W;
+defm PseudoVFNCVT_RTZ_X_F : VPseudoVNCVTI_W;
+defm PseudoVFNCVT_F_XU : VPseudoVNCVTF_W;
+defm PseudoVFNCVT_F_X : VPseudoVNCVTF_W;
+defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W;
+defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W;
} // Predicates = [HasVInstructionsAnyF]
let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 15.1. Vector Single-Width Integer Reduction Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVREDSUM : VPseudoReductionV_VS;
-defm PseudoVREDAND : VPseudoReductionV_VS;
-defm PseudoVREDOR : VPseudoReductionV_VS;
-defm PseudoVREDXOR : VPseudoReductionV_VS;
-defm PseudoVREDMINU : VPseudoReductionV_VS;
-defm PseudoVREDMIN : VPseudoReductionV_VS;
-defm PseudoVREDMAXU : VPseudoReductionV_VS;
-defm PseudoVREDMAX : VPseudoReductionV_VS;
+defm PseudoVREDSUM : VPseudoVRED_VS;
+defm PseudoVREDAND : VPseudoVRED_VS;
+defm PseudoVREDOR : VPseudoVRED_VS;
+defm PseudoVREDXOR : VPseudoVRED_VS;
+defm PseudoVREDMINU : VPseudoVRED_VS;
+defm PseudoVREDMIN : VPseudoVRED_VS;
+defm PseudoVREDMAXU : VPseudoVRED_VS;
+defm PseudoVREDMAX : VPseudoVRED_VS;
//===----------------------------------------------------------------------===//
// 15.2. Vector Widening Integer Reduction Instructions
//===----------------------------------------------------------------------===//
let IsRVVWideningReduction = 1 in {
-defm PseudoVWREDSUMU : VPseudoReductionV_VS;
-defm PseudoVWREDSUM : VPseudoReductionV_VS;
+defm PseudoVWREDSUMU : VPseudoVWRED_VS;
+defm PseudoVWREDSUM : VPseudoVWRED_VS;
}
} // Predicates = [HasVInstructions]
@@ -3982,17 +4219,17 @@ let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFREDOSUM : VPseudoReductionV_VS;
-defm PseudoVFREDUSUM : VPseudoReductionV_VS;
-defm PseudoVFREDMIN : VPseudoReductionV_VS;
-defm PseudoVFREDMAX : VPseudoReductionV_VS;
+defm PseudoVFREDOSUM : VPseudoVFREDO_VS;
+defm PseudoVFREDUSUM : VPseudoVFRED_VS;
+defm PseudoVFREDMIN : VPseudoVFRED_VS;
+defm PseudoVFREDMAX : VPseudoVFRED_VS;
//===----------------------------------------------------------------------===//
// 15.4. Vector Widening Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
let IsRVVWideningReduction = 1 in {
-defm PseudoVFWREDUSUM : VPseudoReductionV_VS;
-defm PseudoVFWREDOSUM : VPseudoReductionV_VS;
+defm PseudoVFWREDUSUM : VPseudoVFWRED_VS;
+defm PseudoVFWREDOSUM : VPseudoVFWRED_VS;
}
} // Predicates = [HasVInstructionsAnyF]
@@ -4005,55 +4242,57 @@ defm PseudoVFWREDOSUM : VPseudoReductionV_VS;
// 16.1 Vector Mask-Register Logical Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMAND: VPseudoBinaryM_MM;
-defm PseudoVMNAND: VPseudoBinaryM_MM;
-defm PseudoVMANDN: VPseudoBinaryM_MM;
-defm PseudoVMXOR: VPseudoBinaryM_MM;
-defm PseudoVMOR: VPseudoBinaryM_MM;
-defm PseudoVMNOR: VPseudoBinaryM_MM;
-defm PseudoVMORN: VPseudoBinaryM_MM;
-defm PseudoVMXNOR: VPseudoBinaryM_MM;
+defm PseudoVMAND: VPseudoVALU_MM;
+defm PseudoVMNAND: VPseudoVALU_MM;
+defm PseudoVMANDN: VPseudoVALU_MM;
+defm PseudoVMXOR: VPseudoVALU_MM;
+defm PseudoVMOR: VPseudoVALU_MM;
+defm PseudoVMNOR: VPseudoVALU_MM;
+defm PseudoVMORN: VPseudoVALU_MM;
+defm PseudoVMXNOR: VPseudoVALU_MM;
// Pseudo instructions
-defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">;
-defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">;
+defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">,
+ Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>;
+defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">,
+ Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>;
//===----------------------------------------------------------------------===//
// 16.2. Vector mask population count vcpop
//===----------------------------------------------------------------------===//
-defm PseudoVCPOP: VPseudoUnaryS_M;
+defm PseudoVCPOP: VPseudoVPOP_M;
//===----------------------------------------------------------------------===//
// 16.3. vfirst find-first-set mask bit
//===----------------------------------------------------------------------===//
-defm PseudoVFIRST: VPseudoUnaryS_M;
+defm PseudoVFIRST: VPseudoV1ST_M;
//===----------------------------------------------------------------------===//
// 16.4. vmsbf.m set-before-first mask bit
//===----------------------------------------------------------------------===//
-defm PseudoVMSBF: VPseudoUnaryM_M;
+defm PseudoVMSBF: VPseudoVSFS_M;
//===----------------------------------------------------------------------===//
// 16.5. vmsif.m set-including-first mask bit
//===----------------------------------------------------------------------===//
-defm PseudoVMSIF: VPseudoUnaryM_M;
+defm PseudoVMSIF: VPseudoVSFS_M;
//===----------------------------------------------------------------------===//
// 16.6. vmsof.m set-only-first mask bit
//===----------------------------------------------------------------------===//
-defm PseudoVMSOF: VPseudoUnaryM_M;
+defm PseudoVMSOF: VPseudoVSFS_M;
//===----------------------------------------------------------------------===//
// 16.8. Vector Iota Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVIOTA_M: VPseudoUnaryV_M;
+defm PseudoVIOTA_M: VPseudoVIOT_M;
//===----------------------------------------------------------------------===//
// 16.9. Vector Element Index Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVID : VPseudoMaskNullaryV;
+defm PseudoVID : VPseudoVID_V;
//===----------------------------------------------------------------------===//
// 17. Vector Permutation Instructions
@@ -4068,15 +4307,18 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach m = MxList.m in {
let VLMul = m.value in {
let HasSEWOp = 1, BaseInstr = VMV_X_S in
- def PseudoVMV_X_S # "_" # m.MX: Pseudo<(outs GPR:$rd),
- (ins m.vrclass:$rs2, ixlenimm:$sew),
- []>, RISCVVPseudo;
+ def PseudoVMV_X_S # "_" # m.MX:
+ Pseudo<(outs GPR:$rd), (ins m.vrclass:$rs2, ixlenimm:$sew), []>,
+ Sched<[WriteVIMovVX, ReadVIMovVX]>,
+ RISCVVPseudo;
let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X,
Constraints = "$rd = $rs1" in
def PseudoVMV_S_X # "_" # m.MX: Pseudo<(outs m.vrclass:$rd),
(ins m.vrclass:$rs1, GPR:$rs2,
AVL:$vl, ixlenimm:$sew),
- []>, RISCVVPseudo;
+ []>,
+ Sched<[WriteVIMovXV, ReadVIMovXV, ReadVIMovXX]>,
+ RISCVVPseudo;
}
}
}
@@ -4093,17 +4335,19 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
let VLMul = m.value in {
let HasSEWOp = 1, BaseInstr = VFMV_F_S in
def "PseudoVFMV_" # f.FX # "_S_" # m.MX :
- Pseudo<(outs f.fprclass:$rd),
- (ins m.vrclass:$rs2,
- ixlenimm:$sew),
- []>, RISCVVPseudo;
+ Pseudo<(outs f.fprclass:$rd),
+ (ins m.vrclass:$rs2, ixlenimm:$sew), []>,
+ Sched<[WriteVFMovVF, ReadVFMovVF]>,
+ RISCVVPseudo;
let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F,
Constraints = "$rd = $rs1" in
def "PseudoVFMV_S_" # f.FX # "_" # m.MX :
Pseudo<(outs m.vrclass:$rd),
(ins m.vrclass:$rs1, f.fprclass:$rs2,
AVL:$vl, ixlenimm:$sew),
- []>, RISCVVPseudo;
+ []>,
+ Sched<[WriteVFMovFV, ReadVFMovFV, ReadVFMovFX]>,
+ RISCVVPseudo;
}
}
}
@@ -4114,52 +4358,33 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
// 17.3. Vector Slide Instructions
//===----------------------------------------------------------------------===//
let Predicates = [HasVInstructions] in {
- defm PseudoVSLIDEUP : VPseudoTernaryV_VX_VI<uimm5, "@earlyclobber $rd">;
- defm PseudoVSLIDEDOWN : VPseudoTernaryV_VX_VI<uimm5>;
- defm PseudoVSLIDE1UP : VPseudoBinaryV_VX<"@earlyclobber $rd">;
- defm PseudoVSLIDE1DOWN : VPseudoBinaryV_VX;
+ defm PseudoVSLIDEUP : VPseudoVSLD_VX_VI<uimm5, "@earlyclobber $rd">;
+ defm PseudoVSLIDEDOWN : VPseudoVSLD_VX_VI<uimm5>;
+ defm PseudoVSLIDE1UP : VPseudoVSLD1_VX<"@earlyclobber $rd">;
+ defm PseudoVSLIDE1DOWN : VPseudoVSLD1_VX;
} // Predicates = [HasVInstructions]
let Predicates = [HasVInstructionsAnyF] in {
- defm PseudoVFSLIDE1UP : VPseudoBinaryV_VF<"@earlyclobber $rd">;
- defm PseudoVFSLIDE1DOWN : VPseudoBinaryV_VF;
+ defm PseudoVFSLIDE1UP : VPseudoVSLD1_VF<"@earlyclobber $rd">;
+ defm PseudoVFSLIDE1DOWN : VPseudoVSLD1_VF;
} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.4. Vector Register Gather Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVRGATHER : VPseudoBinaryV_VV_VX_VI<uimm5, "@earlyclobber $rd">;
-defm PseudoVRGATHEREI16 : VPseudoBinaryV_VV_EEW</* eew */ 16, "@earlyclobber $rd">;
+defm PseudoVRGATHER : VPseudoVGTR_VV_VX_VI<uimm5, "@earlyclobber $rd">;
+defm PseudoVRGATHEREI16 : VPseudoVGTR_VV_EEW</* eew */ 16, "@earlyclobber $rd">;
//===----------------------------------------------------------------------===//
// 17.5. Vector Compress Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVCOMPRESS : VPseudoUnaryV_V_AnyMask;
+defm PseudoVCOMPRESS : VPseudoVCPR_V;
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// 8. Vector AMO Operations
-//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZvamo] in {
- defm : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllIntegerVectors>;
- defm : VPatAMOV_WD<"int_riscv_vamoadd", "PseudoVAMOADD", AllIntegerVectors>;
- defm : VPatAMOV_WD<"int_riscv_vamoxor", "PseudoVAMOXOR", AllIntegerVectors>;
- defm : VPatAMOV_WD<"int_riscv_vamoand", "PseudoVAMOAND", AllIntegerVectors>;
- defm : VPatAMOV_WD<"int_riscv_vamoor", "PseudoVAMOOR", AllIntegerVectors>;
- defm : VPatAMOV_WD<"int_riscv_vamomin", "PseudoVAMOMIN", AllIntegerVectors>;
- defm : VPatAMOV_WD<"int_riscv_vamomax", "PseudoVAMOMAX", AllIntegerVectors>;
- defm : VPatAMOV_WD<"int_riscv_vamominu", "PseudoVAMOMINU", AllIntegerVectors>;
- defm : VPatAMOV_WD<"int_riscv_vamomaxu", "PseudoVAMOMAXU", AllIntegerVectors>;
-} // Predicates = [HasStdExtZvamo]
-
-let Predicates = [HasStdExtZvamo, HasVInstructionsAnyF] in {
- defm : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllFloatVectors>;
-} // Predicates = [HasStdExtZvamo, HasVInstructionsAnyF]
-
-//===----------------------------------------------------------------------===//
// 12. Vector Integer Arithmetic Instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 461bdd348934..7eb8ae7d4193 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -382,50 +382,50 @@ def FSRI : RVBTernaryImm6<0b101, OPC_OP_IMM, "fsri",
} // Predicates = [HasStdExtZbt]
let Predicates = [HasStdExtZbb] in {
-def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, RISCVOpcode<0b0010011>, "clz">,
+def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM, "clz">,
Sched<[WriteCLZ, ReadCLZ]>;
-def CTZ : RVBUnary<0b0110000, 0b00001, 0b001, RISCVOpcode<0b0010011>, "ctz">,
+def CTZ : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM, "ctz">,
Sched<[WriteCTZ, ReadCTZ]>;
-def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0010011>, "cpop">,
+def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM, "cpop">,
Sched<[WriteCPOP, ReadCPOP]>;
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbm, IsRV64] in
-def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, RISCVOpcode<0b0010011>,
- "bmatflip">, Sched<[]>;
+def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, OPC_OP_IMM, "bmatflip">,
+ Sched<[]>;
let Predicates = [HasStdExtZbb] in {
-def SEXTB : RVBUnary<0b0110000, 0b00100, 0b001, RISCVOpcode<0b0010011>,
- "sext.b">, Sched<[WriteIALU, ReadIALU]>;
-def SEXTH : RVBUnary<0b0110000, 0b00101, 0b001, RISCVOpcode<0b0010011>,
- "sext.h">, Sched<[WriteIALU, ReadIALU]>;
+def SEXTB : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">,
+ Sched<[WriteIALU, ReadIALU]>;
+def SEXTH : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">,
+ Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbr] in {
-def CRC32B : RVBUnary<0b0110000, 0b10000, 0b001, RISCVOpcode<0b0010011>,
- "crc32.b">, Sched<[]>;
-def CRC32H : RVBUnary<0b0110000, 0b10001, 0b001, RISCVOpcode<0b0010011>,
- "crc32.h">, Sched<[]>;
-def CRC32W : RVBUnary<0b0110000, 0b10010, 0b001, RISCVOpcode<0b0010011>,
- "crc32.w">, Sched<[]>;
+def CRC32B : RVBUnary<0b0110000, 0b10000, 0b001, OPC_OP_IMM, "crc32.b">,
+ Sched<[]>;
+def CRC32H : RVBUnary<0b0110000, 0b10001, 0b001, OPC_OP_IMM, "crc32.h">,
+ Sched<[]>;
+def CRC32W : RVBUnary<0b0110000, 0b10010, 0b001, OPC_OP_IMM, "crc32.w">,
+ Sched<[]>;
} // Predicates = [HasStdExtZbr]
let Predicates = [HasStdExtZbr, IsRV64] in
-def CRC32D : RVBUnary<0b0110000, 0b10011, 0b001, RISCVOpcode<0b0010011>,
- "crc32.d">, Sched<[]>;
+def CRC32D : RVBUnary<0b0110000, 0b10011, 0b001, OPC_OP_IMM, "crc32.d">,
+ Sched<[]>;
let Predicates = [HasStdExtZbr] in {
-def CRC32CB : RVBUnary<0b0110000, 0b11000, 0b001, RISCVOpcode<0b0010011>,
- "crc32c.b">, Sched<[]>;
-def CRC32CH : RVBUnary<0b0110000, 0b11001, 0b001, RISCVOpcode<0b0010011>,
- "crc32c.h">, Sched<[]>;
-def CRC32CW : RVBUnary<0b0110000, 0b11010, 0b001, RISCVOpcode<0b0010011>,
- "crc32c.w">, Sched<[]>;
+def CRC32CB : RVBUnary<0b0110000, 0b11000, 0b001, OPC_OP_IMM, "crc32c.b">,
+ Sched<[]>;
+def CRC32CH : RVBUnary<0b0110000, 0b11001, 0b001, OPC_OP_IMM, "crc32c.h">,
+ Sched<[]>;
+def CRC32CW : RVBUnary<0b0110000, 0b11010, 0b001, OPC_OP_IMM, "crc32c.w">,
+ Sched<[]>;
} // Predicates = [HasStdExtZbr]
let Predicates = [HasStdExtZbr, IsRV64] in
-def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, RISCVOpcode<0b0010011>,
- "crc32c.d">, Sched<[]>;
+def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">,
+ Sched<[]>;
let Predicates = [HasStdExtZbc] in {
def CLMUL : ALU_rr<0b0000101, 0b001, "clmul">, Sched<[]>;
@@ -523,12 +523,12 @@ def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32,
} // Predicates = [HasStdExtZbt, IsRV64]
let Predicates = [HasStdExtZbb, IsRV64] in {
-def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, RISCVOpcode<0b0011011>,
- "clzw">, Sched<[WriteCLZ32, ReadCLZ32]>;
-def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, RISCVOpcode<0b0011011>,
- "ctzw">, Sched<[WriteCTZ32, ReadCTZ32]>;
-def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0011011>,
- "cpopw">, Sched<[WriteCPOP32, ReadCPOP32]>;
+def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">,
+ Sched<[WriteCLZ32, ReadCLZ32]>;
+def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">,
+ Sched<[WriteCTZ32, ReadCTZ32]>;
+def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">,
+ Sched<[WriteCPOP32, ReadCPOP32]>;
} // Predicates = [HasStdExtZbb, IsRV64]
let Predicates = [HasStdExtZbp, IsRV64] in {
@@ -791,6 +791,9 @@ def : Pat<(xor GPR:$rs1, BSETINVMask:$mask),
def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)),
(BEXTI GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(and (not (srl GPR:$rs1, uimmlog2xlen:$shamt)), (XLenVT 1)),
+ (XORI (BEXTI GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1))>;
+
def : Pat<(or GPR:$r, BSETINVTwoBitsMask:$i),
(BSETI (BSETI GPR:$r, (TrailingZerosXForm BSETINVTwoBitsMask:$i)),
(BSETINVTwoBitsMaskHigh BSETINVTwoBitsMask:$i))>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index a33494461869..663e44813899 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -28,41 +28,6 @@ def riscv_fmv_x_anyexth
: SDNode<"RISCVISD::FMV_X_ANYEXTH", SDT_RISCVFMV_X_ANYEXTH>;
//===----------------------------------------------------------------------===//
-// Instruction class templates
-//===----------------------------------------------------------------------===//
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPFMAH_rrr_frm<RISCVOpcode opcode, string opcodestr>
- : RVInstR4Frm<0b10, opcode, (outs FPR16:$rd),
- (ins FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, frmarg:$funct3),
- opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">;
-
-class FPFMAHDynFrmAlias<FPFMAH_rrr_frm Inst, string OpcodeStr>
- : InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3",
- (Inst FPR16:$rd, FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPALUH_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
- : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR16:$rd),
- (ins FPR16:$rs1, FPR16:$rs2), opcodestr, "$rd, $rs1, $rs2">;
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPALUH_rr_frm<bits<7> funct7, string opcodestr>
- : RVInstRFrm<funct7, OPC_OP_FP, (outs FPR16:$rd),
- (ins FPR16:$rs1, FPR16:$rs2, frmarg:$funct3), opcodestr,
- "$rd, $rs1, $rs2, $funct3">;
-
-class FPALUHDynFrmAlias<FPALUH_rr_frm Inst, string OpcodeStr>
- : InstAlias<OpcodeStr#" $rd, $rs1, $rs2",
- (Inst FPR16:$rd, FPR16:$rs1, FPR16:$rs2, 0b111)>;
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class FPCmpH_rr<bits<3> funct3, string opcodestr>
- : RVInstR<0b1010010, funct3, OPC_OP_FP, (outs GPR:$rd),
- (ins FPR16:$rs1, FPR16:$rs2), opcodestr, "$rd, $rs1, $rs2">,
- Sched<[WriteFCmp16, ReadFCmp16, ReadFCmp16]>;
-
-//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -84,145 +49,120 @@ def FSH : RVInstS<0b001, OPC_STORE_FP, (outs),
} // Predicates = [HasStdExtZfhmin]
let Predicates = [HasStdExtZfh] in {
-def FMADD_H : FPFMAH_rrr_frm<OPC_MADD, "fmadd.h">,
- Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>;
-def : FPFMAHDynFrmAlias<FMADD_H, "fmadd.h">;
-def FMSUB_H : FPFMAH_rrr_frm<OPC_MSUB, "fmsub.h">,
- Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>;
-def : FPFMAHDynFrmAlias<FMSUB_H, "fmsub.h">;
-def FNMSUB_H : FPFMAH_rrr_frm<OPC_NMSUB, "fnmsub.h">,
- Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>;
-def : FPFMAHDynFrmAlias<FNMSUB_H, "fnmsub.h">;
-def FNMADD_H : FPFMAH_rrr_frm<OPC_NMADD, "fnmadd.h">,
- Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>;
-def : FPFMAHDynFrmAlias<FNMADD_H, "fnmadd.h">;
+let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in {
+def FMADD_H : FPFMA_rrr_frm<OPC_MADD, 0b10, "fmadd.h", FPR16>;
+def FMSUB_H : FPFMA_rrr_frm<OPC_MSUB, 0b10, "fmsub.h", FPR16>;
+def FNMSUB_H : FPFMA_rrr_frm<OPC_NMSUB, 0b10, "fnmsub.h", FPR16>;
+def FNMADD_H : FPFMA_rrr_frm<OPC_NMADD, 0b10, "fnmadd.h", FPR16>;
+}
-def FADD_H : FPALUH_rr_frm<0b0000010, "fadd.h">,
+def : FPFMADynFrmAlias<FMADD_H, "fmadd.h", FPR16>;
+def : FPFMADynFrmAlias<FMSUB_H, "fmsub.h", FPR16>;
+def : FPFMADynFrmAlias<FNMSUB_H, "fnmsub.h", FPR16>;
+def : FPFMADynFrmAlias<FNMADD_H, "fnmadd.h", FPR16>;
+
+def FADD_H : FPALU_rr_frm<0b0000010, "fadd.h", FPR16>,
Sched<[WriteFALU16, ReadFALU16, ReadFALU16]>;
-def : FPALUHDynFrmAlias<FADD_H, "fadd.h">;
-def FSUB_H : FPALUH_rr_frm<0b0000110, "fsub.h">,
+def FSUB_H : FPALU_rr_frm<0b0000110, "fsub.h", FPR16>,
Sched<[WriteFALU16, ReadFALU16, ReadFALU16]>;
-def : FPALUHDynFrmAlias<FSUB_H, "fsub.h">;
-def FMUL_H : FPALUH_rr_frm<0b0001010, "fmul.h">,
+def FMUL_H : FPALU_rr_frm<0b0001010, "fmul.h", FPR16>,
Sched<[WriteFMul16, ReadFMul16, ReadFMul16]>;
-def : FPALUHDynFrmAlias<FMUL_H, "fmul.h">;
-def FDIV_H : FPALUH_rr_frm<0b0001110, "fdiv.h">,
+def FDIV_H : FPALU_rr_frm<0b0001110, "fdiv.h", FPR16>,
Sched<[WriteFDiv16, ReadFDiv16, ReadFDiv16]>;
-def : FPALUHDynFrmAlias<FDIV_H, "fdiv.h">;
-def FSQRT_H : FPUnaryOp_r_frm<0b0101110, FPR16, FPR16, "fsqrt.h">,
- Sched<[WriteFSqrt16, ReadFSqrt16]> {
- let rs2 = 0b00000;
-}
+def : FPALUDynFrmAlias<FADD_H, "fadd.h", FPR16>;
+def : FPALUDynFrmAlias<FSUB_H, "fsub.h", FPR16>;
+def : FPALUDynFrmAlias<FMUL_H, "fmul.h", FPR16>;
+def : FPALUDynFrmAlias<FDIV_H, "fdiv.h", FPR16>;
+
+def FSQRT_H : FPUnaryOp_r_frm<0b0101110, 0b00000, FPR16, FPR16, "fsqrt.h">,
+ Sched<[WriteFSqrt16, ReadFSqrt16]>;
def : FPUnaryOpDynFrmAlias<FSQRT_H, "fsqrt.h", FPR16, FPR16>;
-def FSGNJ_H : FPALUH_rr<0b0010010, 0b000, "fsgnj.h">,
- Sched<[WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16]>;
-def FSGNJN_H : FPALUH_rr<0b0010010, 0b001, "fsgnjn.h">,
- Sched<[WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16]>;
-def FSGNJX_H : FPALUH_rr<0b0010010, 0b010, "fsgnjx.h">,
- Sched<[WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16]>;
-def FMIN_H : FPALUH_rr<0b0010110, 0b000, "fmin.h">,
- Sched<[WriteFMinMax16, ReadFMinMax16, ReadFMinMax16]>;
-def FMAX_H : FPALUH_rr<0b0010110, 0b001, "fmax.h">,
- Sched<[WriteFMinMax16, ReadFMinMax16, ReadFMinMax16]>;
+let SchedRW = [WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16],
+ mayRaiseFPException = 0 in {
+def FSGNJ_H : FPALU_rr<0b0010010, 0b000, "fsgnj.h", FPR16>;
+def FSGNJN_H : FPALU_rr<0b0010010, 0b001, "fsgnjn.h", FPR16>;
+def FSGNJX_H : FPALU_rr<0b0010010, 0b010, "fsgnjx.h", FPR16>;
+}
-def FCVT_W_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.w.h">,
- Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]> {
- let rs2 = 0b00000;
+let SchedRW = [WriteFMinMax16, ReadFMinMax16, ReadFMinMax16] in {
+def FMIN_H : FPALU_rr<0b0010110, 0b000, "fmin.h", FPR16>;
+def FMAX_H : FPALU_rr<0b0010110, 0b001, "fmax.h", FPR16>;
}
+
+def FCVT_W_H : FPUnaryOp_r_frm<0b1100010, 0b00000, GPR, FPR16, "fcvt.w.h">,
+ Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>;
def : FPUnaryOpDynFrmAlias<FCVT_W_H, "fcvt.w.h", GPR, FPR16>;
-def FCVT_WU_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.wu.h">,
- Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]> {
- let rs2 = 0b00001;
-}
+def FCVT_WU_H : FPUnaryOp_r_frm<0b1100010, 0b00001, GPR, FPR16, "fcvt.wu.h">,
+ Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>;
def : FPUnaryOpDynFrmAlias<FCVT_WU_H, "fcvt.wu.h", GPR, FPR16>;
-def FCVT_H_W : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.w">,
- Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]> {
- let rs2 = 0b00000;
-}
+def FCVT_H_W : FPUnaryOp_r_frm<0b1101010, 0b00000, FPR16, GPR, "fcvt.h.w">,
+ Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>;
def : FPUnaryOpDynFrmAlias<FCVT_H_W, "fcvt.h.w", FPR16, GPR>;
-def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.wu">,
- Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]> {
- let rs2 = 0b00001;
-}
+def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, 0b00001, FPR16, GPR, "fcvt.h.wu">,
+ Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>;
def : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>;
} // Predicates = [HasStdExtZfh]
let Predicates = [HasStdExtZfhmin] in {
-def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, FPR16, FPR32, "fcvt.h.s">,
- Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]> {
- let rs2 = 0b00000;
-}
+def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, 0b00000, FPR16, FPR32, "fcvt.h.s">,
+ Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
def : FPUnaryOpDynFrmAlias<FCVT_H_S, "fcvt.h.s", FPR16, FPR32>;
-def FCVT_S_H : FPUnaryOp_r<0b0100000, 0b000, FPR32, FPR16, "fcvt.s.h">,
- Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]> {
- let rs2 = 0b00010;
-}
+def FCVT_S_H : FPUnaryOp_r<0b0100000, 0b00010, 0b000, FPR32, FPR16, "fcvt.s.h">,
+ Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]>;
-def FMV_X_H : FPUnaryOp_r<0b1110010, 0b000, GPR, FPR16, "fmv.x.h">,
- Sched<[WriteFMovF16ToI16, ReadFMovF16ToI16]> {
- let rs2 = 0b00000;
-}
+let mayRaiseFPException = 0 in
+def FMV_X_H : FPUnaryOp_r<0b1110010, 0b00000, 0b000, GPR, FPR16, "fmv.x.h">,
+ Sched<[WriteFMovF16ToI16, ReadFMovF16ToI16]>;
-def FMV_H_X : FPUnaryOp_r<0b1111010, 0b000, FPR16, GPR, "fmv.h.x">,
- Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]> {
- let rs2 = 0b00000;
-}
+let mayRaiseFPException = 0 in
+def FMV_H_X : FPUnaryOp_r<0b1111010, 0b00000, 0b000, FPR16, GPR, "fmv.h.x">,
+ Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]>;
} // Predicates = [HasStdExtZfhmin]
let Predicates = [HasStdExtZfh] in {
-def FEQ_H : FPCmpH_rr<0b010, "feq.h">;
-def FLT_H : FPCmpH_rr<0b001, "flt.h">;
-def FLE_H : FPCmpH_rr<0b000, "fle.h">;
-def FCLASS_H : FPUnaryOp_r<0b1110010, 0b001, GPR, FPR16, "fclass.h">,
- Sched<[WriteFClass16, ReadFClass16]> {
- let rs2 = 0b00000;
+let SchedRW = [WriteFCmp16, ReadFCmp16, ReadFCmp16] in {
+def FEQ_H : FPCmp_rr<0b1010010, 0b010, "feq.h", FPR16>;
+def FLT_H : FPCmp_rr<0b1010010, 0b001, "flt.h", FPR16>;
+def FLE_H : FPCmp_rr<0b1010010, 0b000, "fle.h", FPR16>;
}
+
+let mayRaiseFPException = 0 in
+def FCLASS_H : FPUnaryOp_r<0b1110010, 0b00000, 0b001, GPR, FPR16, "fclass.h">,
+ Sched<[WriteFClass16, ReadFClass16]>;
} // Predicates = [HasStdExtZfh]
let Predicates = [HasStdExtZfh, IsRV64] in {
-def FCVT_L_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.l.h">,
- Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]> {
- let rs2 = 0b00010;
-}
+def FCVT_L_H : FPUnaryOp_r_frm<0b1100010, 0b00010, GPR, FPR16, "fcvt.l.h">,
+ Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>;
def : FPUnaryOpDynFrmAlias<FCVT_L_H, "fcvt.l.h", GPR, FPR16>;
-def FCVT_LU_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.lu.h">,
- Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]> {
- let rs2 = 0b00011;
-}
+def FCVT_LU_H : FPUnaryOp_r_frm<0b1100010, 0b00011, GPR, FPR16, "fcvt.lu.h">,
+ Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>;
def : FPUnaryOpDynFrmAlias<FCVT_LU_H, "fcvt.lu.h", GPR, FPR16>;
-def FCVT_H_L : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.l">,
- Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]> {
- let rs2 = 0b00010;
-}
+def FCVT_H_L : FPUnaryOp_r_frm<0b1101010, 0b00010, FPR16, GPR, "fcvt.h.l">,
+ Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>;
def : FPUnaryOpDynFrmAlias<FCVT_H_L, "fcvt.h.l", FPR16, GPR>;
-def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.lu">,
- Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]> {
- let rs2 = 0b00011;
-}
+def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, 0b00011, FPR16, GPR, "fcvt.h.lu">,
+ Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>;
def : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>;
} // Predicates = [HasStdExtZfh, IsRV64]
let Predicates = [HasStdExtZfhmin, HasStdExtD] in {
-def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, FPR16, FPR64, "fcvt.h.d">,
- Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]> {
- let rs2 = 0b00001;
-}
+def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, 0b00001, FPR16, FPR64, "fcvt.h.d">,
+ Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>;
def : FPUnaryOpDynFrmAlias<FCVT_H_D, "fcvt.h.d", FPR16, FPR64>;
-def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR16, "fcvt.d.h">,
- Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]> {
- let rs2 = 0b00010;
-}
+def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b00010, 0b000, FPR64, FPR16, "fcvt.d.h">,
+ Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>;
} // Predicates = [HasStdExtZfhmin, HasStdExtD]
//===----------------------------------------------------------------------===//
@@ -275,12 +215,12 @@ def : Pat<(f16 (fpimm0)), (FMV_H_X X0)>;
/// Float arithmetic operations
-def : PatFpr16Fpr16DynFrm<fadd, FADD_H>;
-def : PatFpr16Fpr16DynFrm<fsub, FSUB_H>;
-def : PatFpr16Fpr16DynFrm<fmul, FMUL_H>;
-def : PatFpr16Fpr16DynFrm<fdiv, FDIV_H>;
+def : PatFpr16Fpr16DynFrm<any_fadd, FADD_H>;
+def : PatFpr16Fpr16DynFrm<any_fsub, FSUB_H>;
+def : PatFpr16Fpr16DynFrm<any_fmul, FMUL_H>;
+def : PatFpr16Fpr16DynFrm<any_fdiv, FDIV_H>;
-def : Pat<(fsqrt FPR16:$rs1), (FSQRT_H FPR16:$rs1, 0b111)>;
+def : Pat<(any_fsqrt FPR16:$rs1), (FSQRT_H FPR16:$rs1, 0b111)>;
def : Pat<(fneg FPR16:$rs1), (FSGNJN_H $rs1, $rs1)>;
def : Pat<(fabs FPR16:$rs1), (FSGNJX_H $rs1, $rs1)>;
@@ -292,19 +232,19 @@ def : Pat<(fcopysign FPR16:$rs1, FPR32:$rs2),
def : Pat<(fcopysign FPR32:$rs1, FPR16:$rs2), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>;
// fmadd: rs1 * rs2 + rs3
-def : Pat<(fma FPR16:$rs1, FPR16:$rs2, FPR16:$rs3),
+def : Pat<(any_fma FPR16:$rs1, FPR16:$rs2, FPR16:$rs3),
(FMADD_H $rs1, $rs2, $rs3, 0b111)>;
// fmsub: rs1 * rs2 - rs3
-def : Pat<(fma FPR16:$rs1, FPR16:$rs2, (fneg FPR16:$rs3)),
+def : Pat<(any_fma FPR16:$rs1, FPR16:$rs2, (fneg FPR16:$rs3)),
(FMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
// fnmsub: -rs1 * rs2 + rs3
-def : Pat<(fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3),
+def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3),
(FNMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
// fnmadd: -rs1 * rs2 - rs3
-def : Pat<(fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)),
+def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)),
(FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
// The ratified 20191213 ISA spec defines fmin and fmax in a way that matches
@@ -337,8 +277,8 @@ defm : StPat<store, FSH, FPR16, f16>;
/// Float conversion operations
// f32 -> f16, f16 -> f32
-def : Pat<(fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>;
-def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>;
+def : Pat<(any_fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>;
+def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>;
// Moves (no conversion)
def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>;
@@ -347,8 +287,8 @@ def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>;
let Predicates = [HasStdExtZfh, IsRV32] in {
// half->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>;
-def : Pat<(i32 (fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>;
// Saturating float->[u]int32.
def : Pat<(i32 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>;
@@ -361,20 +301,20 @@ def : Pat<(i32 (lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>;
def : Pat<(i32 (lround FPR16:$rs1)), (FCVT_W_H $rs1, 0b100)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
-def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>;
-def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>;
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>;
} // Predicates = [HasStdExtZfh, IsRV32]
let Predicates = [HasStdExtZfh, IsRV64] in {
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>;
-def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>;
+def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>;
+def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>;
// half->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;
-def : Pat<(i64 (fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>;
// Saturating float->[u]int64.
def : Pat<(i64 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;
@@ -389,17 +329,17 @@ def : Pat<(i64 (lround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>;
def : Pat<(i64 (llround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>;
-def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>;
-def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>;
-def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>;
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>;
+def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>;
+def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>;
} // Predicates = [HasStdExtZfh, IsRV64]
let Predicates = [HasStdExtZfhmin, HasStdExtD] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
-def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>;
-def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>;
+def : Pat<(any_fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>;
+def : Pat<(any_fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>;
/// Float arithmetic operations
def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2),
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 798532d5bc44..9094dff1dda1 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -105,7 +105,6 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Floating point environment registers.
markSuperRegs(Reserved, RISCV::FRM);
markSuperRegs(Reserved, RISCV::FFLAGS);
- markSuperRegs(Reserved, RISCV::FCSR);
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index a56f992d320e..20903b317180 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -550,16 +550,15 @@ def VRM8NoV0 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
vfloat16m8_t, vfloat32m8_t, vfloat64m8_t],
(add V8M8, V16M8, V24M8), 8>;
-defvar VMaskVTs = [vbool64_t, vbool32_t, vbool16_t, vbool8_t,
- vbool4_t, vbool2_t, vbool1_t];
+defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,
+ vbool32_t, vbool64_t];
def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> {
let Size = 64;
}
// The register class is added for inline assembly for vector mask types.
-def VM : VReg<[vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,
- vbool32_t, vbool64_t],
+def VM : VReg<VMaskVTs,
(add (sequence "V%u", 8, 31),
(sequence "V%u", 0, 7)), 1>;
@@ -578,7 +577,6 @@ foreach m = LMULList.m in {
// Special registers
def FFLAGS : RISCVReg<0, "fflags">;
def FRM : RISCVReg<0, "frm">;
-def FCSR : RISCVReg<0, "fcsr">;
// Any type register. Used for .insn directives when we don't know what the
// register types could be.
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 14f59152ed42..d5a0932c8778 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -16,7 +16,8 @@ def RocketModel : SchedMachineModel {
let IssueWidth = 1; // 1 micro-op is dispatched per cycle.
let LoadLatency = 3;
let MispredictPenalty = 3;
- let UnsupportedFeatures = [HasStdExtV, HasStdExtZvamo, HasStdExtZvlsseg];
+ let CompleteModel = false;
+ let UnsupportedFeatures = [HasStdExtV, HasStdExtZvlsseg];
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 5b435fcb16a2..7f9d0aabc4ed 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -15,7 +15,7 @@ def SiFive7Model : SchedMachineModel {
let LoadLatency = 3;
let MispredictPenalty = 3;
let CompleteModel = 0;
- let UnsupportedFeatures = [HasStdExtV, HasStdExtZvamo, HasStdExtZvlsseg];
+ let UnsupportedFeatures = [HasStdExtV, HasStdExtZvlsseg];
}
// The SiFive7 microarchitecture has two pipelines: A and B.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index deb2a11f98f1..d0330e6984a5 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -51,7 +51,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasStdExtZbt = false;
bool HasStdExtV = false;
bool HasStdExtZvlsseg = false;
- bool HasStdExtZvamo = false;
bool HasStdExtZfhmin = false;
bool HasStdExtZfh = false;
bool HasRV64 = false;
@@ -118,7 +117,6 @@ public:
bool hasStdExtZbt() const { return HasStdExtZbt; }
bool hasStdExtV() const { return HasStdExtV; }
bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; }
- bool hasStdExtZvamo() const { return HasStdExtZvamo; }
bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
bool hasStdExtZfh() const { return HasStdExtZfh; }
bool is64Bit() const { return HasRV64; }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 56f0952fafc9..c435430a1288 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -162,3 +162,94 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0, CostKind, I);
return NumLoads * MemOpCost;
}
+
+void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
+ // TODO: More tuning on benchmarks and metrics with changes as needed
+ // would apply to all settings below to enable performance.
+
+ // Support explicit targets enabled for SiFive with the unrolling preferences
+ // below
+ bool UseDefaultPreferences = true;
+ if (ST->getTuneCPU().contains("sifive-e76") ||
+ ST->getTuneCPU().contains("sifive-s76") ||
+ ST->getTuneCPU().contains("sifive-u74") ||
+ ST->getTuneCPU().contains("sifive-7"))
+ UseDefaultPreferences = false;
+
+ if (UseDefaultPreferences)
+ return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);
+
+ // Enable Upper bound unrolling universally, not dependant upon the conditions
+ // below.
+ UP.UpperBound = true;
+
+ // Disable loop unrolling for Oz and Os.
+ UP.OptSizeThreshold = 0;
+ UP.PartialOptSizeThreshold = 0;
+ if (L->getHeader()->getParent()->hasOptSize())
+ return;
+
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ LLVM_DEBUG(dbgs() << "Loop has:\n"
+ << "Blocks: " << L->getNumBlocks() << "\n"
+ << "Exit blocks: " << ExitingBlocks.size() << "\n");
+
+ // Only allow another exit other than the latch. This acts as an early exit
+ // as it mirrors the profitability calculation of the runtime unroller.
+ if (ExitingBlocks.size() > 2)
+ return;
+
+ // Limit the CFG of the loop body for targets with a branch predictor.
+ // Allowing 4 blocks permits if-then-else diamonds in the body.
+ if (L->getNumBlocks() > 4)
+ return;
+
+ // Don't unroll vectorized loops, including the remainder loop
+ if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
+ return;
+
+ // Scan the loop: don't unroll loops with calls as this could prevent
+ // inlining.
+ InstructionCost Cost = 0;
+ for (auto *BB : L->getBlocks()) {
+ for (auto &I : *BB) {
+ // Initial setting - Don't unroll loops containing vectorized
+ // instructions.
+ if (I.getType()->isVectorTy())
+ return;
+
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
+ if (!isLoweredToCall(F))
+ continue;
+ }
+ return;
+ }
+
+ SmallVector<const Value *> Operands(I.operand_values());
+ Cost +=
+ getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
+
+ UP.Partial = true;
+ UP.Runtime = true;
+ UP.UnrollRemainder = true;
+ UP.UnrollAndJam = true;
+ UP.UnrollAndJamInnerLoopThreshold = 60;
+
+ // Force unrolling small loops can be very useful because of the branch
+ // taken cost of the backedge.
+ if (Cost < 12)
+ UP.Force = true;
+}
+
+void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 675681616d6e..7353496f4684 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -73,6 +73,13 @@ public:
llvm_unreachable("Unsupported register kind");
}
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
+
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
unsigned getMinVectorRegisterBitWidth() const {
return ST->hasVInstructions() ? ST->getMinRVVVectorSizeInBits() : 0;
}
@@ -178,7 +185,9 @@ public:
}
unsigned getMaxInterleaveFactor(unsigned VF) {
- return ST->getMaxInterleaveFactor();
+ // If the loop will not be vectorized, don't interleave the loop.
+ // Let regular unroll to unroll the loop.
+ return VF == 1 ? 1 : ST->getMaxInterleaveFactor();
}
};
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 0f5e0b9672a9..538380263c3c 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -28,25 +28,43 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value,
if (Kind < FirstTargetFixupKind)
return Value;
+ auto checkFixupInRange = [&](int64_t Min, int64_t Max) -> bool {
+ int64_t SVal = int64_t(Value);
+ if (SVal < Min || SVal > Max) {
+ Ctx.reportError(Fixup.getLoc(), "operand out of range (" + Twine(SVal) +
+ " not between " + Twine(Min) +
+ " and " + Twine(Max) + ")");
+ return false;
+ }
+ return true;
+ };
+
+ auto handlePCRelFixupValue = [&](unsigned W) -> uint64_t {
+ if (Value % 2 != 0)
+ Ctx.reportError(Fixup.getLoc(), "Non-even PC relative offset.");
+ if (!checkFixupInRange(minIntN(W) * 2, maxIntN(W) * 2))
+ return 0;
+ return (int64_t)Value / 2;
+ };
+
switch (unsigned(Kind)) {
case SystemZ::FK_390_PC12DBL:
+ return handlePCRelFixupValue(12);
case SystemZ::FK_390_PC16DBL:
+ return handlePCRelFixupValue(16);
case SystemZ::FK_390_PC24DBL:
+ return handlePCRelFixupValue(24);
case SystemZ::FK_390_PC32DBL:
- return (int64_t)Value / 2;
+ return handlePCRelFixupValue(32);
case SystemZ::FK_390_12:
- if (!isUInt<12>(Value)) {
- Ctx.reportError(Fixup.getLoc(), "displacement exceeds uint12");
+ if (!checkFixupInRange(0, maxUIntN(12)))
return 0;
- }
return Value;
case SystemZ::FK_390_20: {
- if (!isInt<20>(Value)) {
- Ctx.reportError(Fixup.getLoc(), "displacement exceeds int20");
+ if (!checkFixupInRange(minIntN(20), maxIntN(20)))
return 0;
- }
// The high byte of a 20 bit displacement value comes first.
uint64_t DLo = Value & 0xfff;
uint64_t DHi = (Value >> 12) & 0xff;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index e280e4aaf3d8..c83796b8579b 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -197,7 +197,8 @@ getDispOpValue(const MCInst &MI, unsigned OpNum,
// All instructions follow the pattern where the first displacement has a
// 2 bytes offset, and the second one 4 bytes.
unsigned ByteOffs = Fixups.size() == 0 ? 2 : 4;
- Fixups.push_back(MCFixup::create(ByteOffs, MO.getExpr(), (MCFixupKind)Kind));
+ Fixups.push_back(MCFixup::create(ByteOffs, MO.getExpr(), (MCFixupKind)Kind,
+ MI.getLoc()));
assert(Fixups.size() <= 2 && "More than two memory operands in MI?");
return 0;
}
@@ -296,6 +297,7 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
unsigned Kind, int64_t Offset,
bool AllowTLS) const {
+ SMLoc Loc = MI.getLoc();
const MCOperand &MO = MI.getOperand(OpNum);
const MCExpr *Expr;
if (MO.isImm())
@@ -311,13 +313,13 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx);
}
}
- Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind));
+ Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind, Loc));
// Output the fixup for the TLS marker if present.
if (AllowTLS && OpNum + 1 < MI.getNumOperands()) {
const MCOperand &MOTLS = MI.getOperand(OpNum + 1);
- Fixups.push_back(MCFixup::create(0, MOTLS.getExpr(),
- (MCFixupKind)SystemZ::FK_390_TLS_CALL));
+ Fixups.push_back(MCFixup::create(
+ 0, MOTLS.getExpr(), (MCFixupKind)SystemZ::FK_390_TLS_CALL, Loc));
}
return 0;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index 373023effb4a..a7ea5e1e4bf8 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -166,6 +166,7 @@ def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>;
// any non-leaf function and restored in the epilogue for use by the
// return instruction so it functions exactly like a callee-saved register.
def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 7, 15),
+ (sequence "R%dD", 4, 4),
(sequence "F%dD", 15, 8))>;
def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64,
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 2f7cdfcf7bde..99ab4c5455d6 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -818,7 +818,7 @@ bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const {
}
SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
- : SystemZFrameLowering(TargetFrameLowering::StackGrowsUp, Align(32), 128,
+ : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(32), 0,
Align(32), /* StackRealignable */ false),
RegSpillOffsets(-1) {
@@ -990,12 +990,184 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
return true;
}
+bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Restore FPRs in the normal TargetInstrInfo way.
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg))
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+ &SystemZ::FP64BitRegClass, TRI);
+ if (SystemZ::VR128BitRegClass.contains(Reg))
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+ &SystemZ::VR128BitRegClass, TRI);
+ }
+
+ // Restore call-saved GPRs (but not call-clobbered varargs, which at
+ // this point might hold return values).
+ SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs();
+ if (RestoreGPRs.LowGPR) {
+ assert(isInt<20>(Regs.getStackPointerBias() + RestoreGPRs.GPROffset));
+ if (RestoreGPRs.LowGPR == RestoreGPRs.HighGPR)
+ // Build an LG/L instruction.
+ BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LG), RestoreGPRs.LowGPR)
+ .addReg(Regs.getStackPointerRegister())
+ .addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset)
+ .addReg(0);
+ else {
+ // Build an LMG/LM instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
+
+ // Add the explicit register operands.
+ MIB.addReg(RestoreGPRs.LowGPR, RegState::Define);
+ MIB.addReg(RestoreGPRs.HighGPR, RegState::Define);
+
+ // Add the address.
+ MIB.addReg(Regs.getStackPointerRegister());
+ MIB.addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset);
+
+ // Do a second scan adding regs as being defined by instruction
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (Reg > RestoreGPRs.LowGPR && Reg < RestoreGPRs.HighGPR)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+ }
+ }
+
+ return true;
+}
+
void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {}
+ MachineBasicBlock &MBB) const {
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ MachineInstr *StoreInstr = nullptr;
+ bool HasFP = hasFP(MF);
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
+ uint64_t Offset = 0;
+
+ // TODO: Support leaf functions; only add size of save+reserved area when
+ // function is non-leaf.
+ MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize());
+ uint64_t StackSize = MFFrame.getStackSize();
+
+ // FIXME: Implement support for large stack sizes, when the stack extension
+ // routine needs to be called.
+ if (StackSize > 1024 * 1024) {
+ llvm_unreachable("Huge Stack Frame not yet supported on z/OS");
+ }
+
+ if (ZFI->getSpillGPRRegs().LowGPR) {
+ // Skip over the GPR saves.
+ if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) {
+ const int Operand = 3;
+ // Now we can set the offset for the operation, since now the Stack
+ // has been finalized.
+ Offset = Regs.getStackPointerBias() + MBBI->getOperand(Operand).getImm();
+ // Maximum displacement for STMG instruction.
+ if (isInt<20>(Offset - StackSize))
+ Offset -= StackSize;
+ else
+ StoreInstr = &*MBBI;
+ MBBI->getOperand(Operand).setImm(Offset);
+ ++MBBI;
+ } else
+ llvm_unreachable("Couldn't skip over GPR saves");
+ }
+
+ if (StackSize) {
+ MachineBasicBlock::iterator InsertPt = StoreInstr ? StoreInstr : MBBI;
+ // Allocate StackSize bytes.
+ int64_t Delta = -int64_t(StackSize);
+
+ // In case the STM(G) instruction also stores SP (R4), but the displacement
+ // is too large, the SP register is manipulated first before storing,
+ // resulting in the wrong value stored and retrieved later. In this case, we
+ // need to temporarily save the value of SP, and store it later to memory.
+ if (StoreInstr && HasFP) {
+ // Insert LR r0,r4 before STMG instruction.
+ BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R0D, RegState::Define)
+ .addReg(SystemZ::R4D);
+ // Insert ST r0,xxx(,r4) after STMG instruction.
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
+ .addReg(SystemZ::R0D, RegState::Kill)
+ .addReg(SystemZ::R4D)
+ .addImm(Offset)
+ .addReg(0);
+ }
+
+ emitIncrement(MBB, InsertPt, DL, Regs.getStackPointerRegister(), Delta,
+ ZII);
+ }
+
+ if (HasFP) {
+ // Copy the base of the frame to Frame Pointer Register.
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR),
+ Regs.getFramePointerRegister())
+ .addReg(Regs.getStackPointerRegister());
+
+ // Mark the FramePtr as live at the beginning of every block except
+ // the entry block. (We'll have marked R8 as live on entry when
+ // saving the GPRs.)
+ for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I)
+ I->addLiveIn(Regs.getFramePointerRegister());
+ }
+}
void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {}
+ MachineBasicBlock &MBB) const {
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ // Skip the return instruction.
+ assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
+
+ uint64_t StackSize = MFFrame.getStackSize();
+ if (StackSize) {
+ unsigned SPReg = Regs.getStackPointerRegister();
+ if (ZFI->getRestoreGPRRegs().LowGPR != SPReg) {
+ DebugLoc DL = MBBI->getDebugLoc();
+ emitIncrement(MBB, MBBI, DL, SPReg, StackSize, ZII);
+ }
+ }
+}
bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
- return false;
+ return (MF.getFrameInfo().hasVarSizedObjects());
+}
+
+void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ // Setup stack frame offset
+ MFFrame.setOffsetAdjustment(Regs.getStackPointerBias());
}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index af219da79c32..106b9e8ebe06 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -115,11 +115,20 @@ public:
ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBII,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
+
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool hasFP(const MachineFunction &MF) const override;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 71432218068e..24de52850771 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1500,8 +1500,16 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
assert(VA.isMemLoc() && "Argument not register or memory");
// Create the frame index object for this incoming parameter.
- int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
- VA.getLocMemOffset(), true);
+ // FIXME: Pre-include call frame size in the offset, should not
+ // need to manually add it here.
+ int64_t ArgSPOffset = VA.getLocMemOffset();
+ if (Subtarget.isTargetXPLINK64()) {
+ auto &XPRegs =
+ Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+ ArgSPOffset += XPRegs.getCallFrameSize();
+ }
+ int FI =
+ MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
// Create the SelectionDAG nodes corresponding to a load
// from this parameter. Unpromoted ints and floats are
@@ -5714,6 +5722,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(OC);
OPCODE(XC);
OPCODE(CLC);
+ OPCODE(MEMSET_MVC);
OPCODE(STPCPY);
OPCODE(STRCMP);
OPCODE(SEARCH_STRING);
@@ -7860,8 +7869,10 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
return MBB;
}
-MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
- MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
+MachineBasicBlock *
+SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode, bool IsMemset) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
@@ -7870,18 +7881,64 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
uint64_t DestDisp = MI.getOperand(1).getImm();
- MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
- uint64_t SrcDisp = MI.getOperand(3).getImm();
- MachineOperand &LengthMO = MI.getOperand(4);
+ MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
+ uint64_t SrcDisp;
+
+ // Fold the displacement Disp if it is out of range.
+ auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
+ if (!isUInt<12>(Disp)) {
+ Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
+ .add(Base).addImm(Disp).addReg(0);
+ Base = MachineOperand::CreateReg(Reg, false);
+ Disp = 0;
+ }
+ };
+
+ if (!IsMemset) {
+ SrcBase = earlyUseOperand(MI.getOperand(2));
+ SrcDisp = MI.getOperand(3).getImm();
+ } else {
+ SrcBase = DestBase;
+ SrcDisp = DestDisp++;
+ foldDisplIfNeeded(DestBase, DestDisp);
+ }
+
+ MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
bool IsImmForm = LengthMO.isImm();
bool IsRegForm = !IsImmForm;
+ // Build and insert one Opcode of Length, with special treatment for memset.
+ auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
+ MachineBasicBlock::iterator InsPos,
+ MachineOperand DBase, uint64_t DDisp,
+ MachineOperand SBase, uint64_t SDisp,
+ unsigned Length) -> void {
+ assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
+ if (IsMemset) {
+ MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
+ if (ByteMO.isImm())
+ BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
+ .add(SBase).addImm(SDisp).add(ByteMO);
+ else
+ BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
+ .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
+ if (--Length == 0)
+ return;
+ }
+ BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
+ .add(DBase).addImm(DDisp).addImm(Length)
+ .add(SBase).addImm(SDisp)
+ .setMemRefs(MI.memoperands());
+ };
+
bool NeedsLoop = false;
uint64_t ImmLength = 0;
- Register LenMinus1Reg = SystemZ::NoRegister;
+ Register LenAdjReg = SystemZ::NoRegister;
if (IsImmForm) {
ImmLength = LengthMO.getImm();
- ImmLength++; // Add back the '1' subtracted originally.
+ ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
if (ImmLength == 0) {
MI.eraseFromParent();
return MBB;
@@ -7905,7 +7962,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
NeedsLoop = true;
} else {
NeedsLoop = true;
- LenMinus1Reg = LengthMO.getReg();
+ LenAdjReg = LengthMO.getReg();
}
// When generating more than one CLC, all but the last will need to
@@ -7923,17 +7980,17 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
ImmLength &= 255;
} else {
BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
- .addReg(LenMinus1Reg)
+ .addReg(LenAdjReg)
.addReg(0)
.addImm(8);
}
+ bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
auto loadZeroAddress = [&]() -> MachineOperand {
Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
return MachineOperand::CreateReg(Reg, false);
};
- bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
DestBase = loadZeroAddress();
if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
@@ -7968,14 +8025,41 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
DoneMBB = SystemZ::emitBlockAfter(NextMBB);
// MBB:
- // # Jump to AllDoneMBB if LenMinus1Reg is -1, or fall thru to StartMBB.
+ // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
- .addReg(LenMinus1Reg).addImm(-1);
+ .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
.addMBB(AllDoneMBB);
MBB->addSuccessor(AllDoneMBB);
- MBB->addSuccessor(StartMBB);
+ if (!IsMemset)
+ MBB->addSuccessor(StartMBB);
+ else {
+ // MemsetOneCheckMBB:
+ // # Jump to MemsetOneMBB for a memset of length 1, or
+ // # fall thru to StartMBB.
+ MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
+ MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
+ MBB->addSuccessor(MemsetOneCheckMBB);
+ MBB = MemsetOneCheckMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+ .addReg(LenAdjReg).addImm(-1);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
+ .addMBB(MemsetOneMBB);
+ MBB->addSuccessor(MemsetOneMBB, {10, 100});
+ MBB->addSuccessor(StartMBB, {90, 100});
+
+ // MemsetOneMBB:
+ // # Jump back to AllDoneMBB after a single MVI or STC.
+ MBB = MemsetOneMBB;
+ insertMemMemOp(MBB, MBB->end(),
+ MachineOperand::CreateReg(StartDestReg, false), DestDisp,
+ MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
+ 1);
+ BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
+ MBB->addSuccessor(AllDoneMBB);
+ }
// StartMBB:
// # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
@@ -8032,10 +8116,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
if (Opcode == SystemZ::MVC)
BuildMI(MBB, DL, TII->get(SystemZ::PFD))
.addImm(SystemZ::PFD_WRITE)
- .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
- BuildMI(MBB, DL, TII->get(Opcode))
- .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
- .addReg(ThisSrcReg).addImm(SrcDisp);
+ .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
+ insertMemMemOp(MBB, MBB->end(),
+ MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
+ MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
if (EndMBB) {
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
@@ -8075,7 +8159,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
// # Use EXecute Relative Long for the remainder of the bytes. The target
// instruction of the EXRL will have a length field of 1 since 0 is an
- // illegal value. The number of bytes processed becomes (%LenMinus1Reg &
+ // illegal value. The number of bytes processed becomes (%LenAdjReg &
// 0xff) + 1.
// # Fall through to AllDoneMBB.
Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
@@ -8088,10 +8172,14 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
.addReg(StartSrcReg).addMBB(StartMBB)
.addReg(NextSrcReg).addMBB(NextMBB);
+ if (IsMemset)
+ insertMemMemOp(MBB, MBB->end(),
+ MachineOperand::CreateReg(RemDestReg, false), DestDisp,
+ MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
MachineInstrBuilder EXRL_MIB =
BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
.addImm(Opcode)
- .addReg(LenMinus1Reg)
+ .addReg(LenAdjReg)
.addReg(RemDestReg).addImm(DestDisp)
.addReg(RemSrcReg).addImm(SrcDisp);
MBB->addSuccessor(AllDoneMBB);
@@ -8107,32 +8195,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
while (ImmLength > 0) {
uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
// The previous iteration might have created out-of-range displacements.
- // Apply them using LAY if so.
- if (!isUInt<12>(DestDisp)) {
- Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
- BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
- .add(DestBase)
- .addImm(DestDisp)
- .addReg(0);
- DestBase = MachineOperand::CreateReg(Reg, false);
- DestDisp = 0;
- }
- if (!isUInt<12>(SrcDisp)) {
- Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
- BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
- .add(SrcBase)
- .addImm(SrcDisp)
- .addReg(0);
- SrcBase = MachineOperand::CreateReg(Reg, false);
- SrcDisp = 0;
- }
- BuildMI(*MBB, MI, DL, TII->get(Opcode))
- .add(DestBase)
- .addImm(DestDisp)
- .addImm(ThisLength)
- .add(SrcBase)
- .addImm(SrcDisp)
- .setMemRefs(MI.memoperands());
+ // Apply them using LA/LAY if so.
+ foldDisplIfNeeded(DestBase, DestDisp);
+ foldDisplIfNeeded(SrcBase, SrcDisp);
+ insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
DestDisp += ThisLength;
SrcDisp += ThisLength;
ImmLength -= ThisLength;
@@ -8630,6 +8696,11 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::CLCImm:
case SystemZ::CLCReg:
return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
+ case SystemZ::MemsetImmImm:
+ case SystemZ::MemsetImmReg:
+ case SystemZ::MemsetRegImm:
+ case SystemZ::MemsetRegReg:
+ return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
case SystemZ::CLSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::CLST);
case SystemZ::MVSTLoop:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 461f804ca55e..940c0a857ea4 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -126,6 +126,9 @@ enum NodeType : unsigned {
// as for MVC.
CLC,
+ // Use MVC to set a block of memory after storing the first byte.
+ MEMSET_MVC,
+
// Use an MVST-based sequence to implement stpcpy().
STPCPY,
@@ -709,7 +712,8 @@ private:
MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *BB,
- unsigned Opcode) const;
+ unsigned Opcode,
+ bool IsMemset = false) const;
MachineBasicBlock *emitStringWrapper(MachineInstr &MI, MachineBasicBlock *BB,
unsigned Opcode) const;
MachineBasicBlock *emitTransactionBegin(MachineInstr &MI,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index cd60fff1ab11..e513befd0d6f 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -5256,6 +5256,16 @@ class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
let Constraints = "$R1 = $R1src";
}
+class MemsetPseudo<DAGOperand lenop, DAGOperand byteop>
+ : Pseudo<(outs), (ins bdaddr12only:$dest, lenop:$length, byteop:$B),
+ [(z_memset_mvc bdaddr12only:$dest, lenop:$length, byteop:$B)]> {
+ let Defs = [CC];
+ let mayLoad = 1;
+ let mayStore = 1;
+ let usesCustomInserter = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
//===----------------------------------------------------------------------===//
// Multiclasses that emit both real and pseudo instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index e4760229fd6b..84f1e0fb428c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -510,6 +510,12 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in {
def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>;
}
+// Memset[Length][Byte] pseudos.
+def MemsetImmImm : MemsetPseudo<imm64, imm32zx8trunc>;
+def MemsetImmReg : MemsetPseudo<imm64, GR32>;
+def MemsetRegImm : MemsetPseudo<ADDR64, imm32zx8trunc>;
+def MemsetRegReg : MemsetPseudo<ADDR64, GR32>;
+
// Move right.
let Predicates = [FeatureMiscellaneousExtensions3],
mayLoad = 1, mayStore = 1, Uses = [R0L] in
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 927d97233286..9935416559bc 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -102,6 +102,10 @@ def SDT_ZMemMemLengthCC : SDTypeProfile<1, 3,
SDTCisPtrTy<1>,
SDTCisPtrTy<2>,
SDTCisVT<3, i64>]>;
+def SDT_ZMemsetMVC : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i32>]>;
def SDT_ZString : SDTypeProfile<1, 3,
[SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
@@ -413,6 +417,8 @@ def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLengthCC,
[SDNPHasChain, SDNPMayLoad]>;
+def z_memset_mvc : SDNode<"SystemZISD::MEMSET_MVC", SDT_ZMemsetMVC,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZStringCC,
[SDNPHasChain, SDNPMayLoad]>;
def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString,
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index f38e93109967..db4b4879b33a 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -17,29 +17,44 @@ using namespace llvm;
#define DEBUG_TYPE "systemz-selectiondag-info"
-static SDVTList getMemMemVTs(unsigned Op, SelectionDAG &DAG) {
- return Op == SystemZISD::CLC ? DAG.getVTList(MVT::i32, MVT::Other)
- : DAG.getVTList(MVT::Other);
+static unsigned getMemMemLenAdj(unsigned Op) {
+ return Op == SystemZISD::MEMSET_MVC ? 2 : 1;
}
-// Emit a mem-mem operation after subtracting one from size, which will be
-// added back during pseudo expansion. As the Reg case emitted here may be
-// converted by DAGCombiner into having an Imm length, they are both emitted
-// the same way.
+static SDValue createMemMemNode(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue LenAdj, SDValue Byte) {
+ SDVTList VTs = Op == SystemZISD::CLC ? DAG.getVTList(MVT::i32, MVT::Other)
+ : DAG.getVTList(MVT::Other);
+ SmallVector<SDValue, 6> Ops;
+ if (Op == SystemZISD::MEMSET_MVC)
+ Ops = { Chain, Dst, LenAdj, Byte };
+ else
+ Ops = { Chain, Dst, Src, LenAdj };
+ return DAG.getNode(Op, DL, VTs, Ops);
+}
+
+// Emit a mem-mem operation after subtracting one (or two for memset) from
+// size, which will be added back during pseudo expansion. As the Reg case
+// emitted here may be converted by DAGCombiner into having an Imm length,
+// they are both emitted the same way.
static SDValue emitMemMemImm(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
SDValue Chain, SDValue Dst, SDValue Src,
- uint64_t Size) {
- return DAG.getNode(Op, DL, getMemMemVTs(Op, DAG), Chain, Dst, Src,
- DAG.getConstant(Size - 1, DL, Src.getValueType()));
+ uint64_t Size, SDValue Byte = SDValue()) {
+ unsigned Adj = getMemMemLenAdj(Op);
+ assert(Size >= Adj && "Adjusted length overflow.");
+ SDValue LenAdj = DAG.getConstant(Size - Adj, DL, Dst.getValueType());
+ return createMemMemNode(DAG, DL, Op, Chain, Dst, Src, LenAdj, Byte);
}
static SDValue emitMemMemReg(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size) {
- SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
- DAG.getZExtOrTrunc(Size, DL, MVT::i64),
- DAG.getConstant(-1, DL, MVT::i64));
- return DAG.getNode(Op, DL, getMemMemVTs(Op, DAG), Chain, Dst, Src, LenMinus1);
+ SDValue Size, SDValue Byte = SDValue()) {
+ int64_t Adj = getMemMemLenAdj(Op);
+ SDValue LenAdj = DAG.getNode(ISD::ADD, DL, MVT::i64,
+ DAG.getZExtOrTrunc(Size, DL, MVT::i64),
+ DAG.getConstant(0 - Adj, DL, MVT::i64));
+ return createMemMemNode(DAG, DL, Op, Chain, Dst, Src, LenAdj, Byte);
}
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
@@ -127,13 +142,8 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
if (CByte && CByte->getZExtValue() == 0)
return emitMemMemImm(DAG, DL, SystemZISD::XC, Chain, Dst, Dst, Bytes);
- // Copy the byte to the first location and then use MVC to copy
- // it to the rest.
- Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment);
- SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
- DAG.getConstant(1, DL, PtrVT));
- return emitMemMemImm(DAG, DL, SystemZISD::MVC, Chain, DstPlus1, Dst,
- Bytes - 1);
+ return emitMemMemImm(DAG, DL, SystemZISD::MEMSET_MVC, Chain, Dst, SDValue(),
+ Bytes, DAG.getAnyExtOrTrunc(Byte, DL, MVT::i32));
}
// Variable length
@@ -141,7 +151,8 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
// Handle the special case of a variable length memset of 0 with XC.
return emitMemMemReg(DAG, DL, SystemZISD::XC, Chain, Dst, Dst, Size);
- return SDValue();
+ return emitMemMemReg(DAG, DL, SystemZISD::MEMSET_MVC, Chain, Dst, SDValue(),
+ Size, DAG.getAnyExtOrTrunc(Byte, DL, MVT::i32));
}
// Convert the current CC value into an integer that is 0 if CC == 0,
diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index 7e92e4b33812..fd9dc32b04f5 100644
--- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -84,6 +84,8 @@ class VEAsmParser : public MCTargetAsmParser {
StringRef splitMnemonic(StringRef Name, SMLoc NameLoc,
OperandVector *Operands);
+ bool parseLiteralValues(unsigned Size, SMLoc L);
+
public:
VEAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
@@ -994,10 +996,43 @@ bool VEAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
bool VEAsmParser::ParseDirective(AsmToken DirectiveID) {
+ std::string IDVal = DirectiveID.getIdentifier().lower();
+
+ // Defines VE specific directives. Reference is "Vector Engine Assembly
+ // Language Reference Manual":
+ // https://www.hpc.nec/documents/sdk/pdfs/VectorEngine-as-manual-v1.3.pdf
+
+ // The .word is 4 bytes long on VE.
+ if (IDVal == ".word")
+ return parseLiteralValues(4, DirectiveID.getLoc());
+
+ // The .long is 8 bytes long on VE.
+ if (IDVal == ".long")
+ return parseLiteralValues(8, DirectiveID.getLoc());
+
+ // The .llong is 8 bytes long on VE.
+ if (IDVal == ".llong")
+ return parseLiteralValues(8, DirectiveID.getLoc());
+
// Let the MC layer to handle other directives.
return true;
}
+/// parseLiteralValues
+/// ::= .word expression [, expression]*
+/// ::= .long expression [, expression]*
+/// ::= .llong expression [, expression]*
+bool VEAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
+ auto parseOne = [&]() -> bool {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+ getParser().getStreamer().emitValue(Value, Size, L);
+ return false;
+ };
+ return (parseMany(parseOne));
+}
+
/// Extract \code @lo32/@hi32/etc \endcode modifier from expression.
/// Recursively scan the expression and check for VK_VE_HI32/LO32/etc
/// symbol variants. If all symbols with modifier use the same
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
index 29c209934680..38d163b37080 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
@@ -42,6 +42,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
case VE::fixup_ve_tpoff_hi32:
return (Value >> 32) & 0xffffffff;
case VE::fixup_ve_reflong:
+ case VE::fixup_ve_srel32:
case VE::fixup_ve_lo32:
case VE::fixup_ve_pc_lo32:
case VE::fixup_ve_got_lo32:
@@ -68,6 +69,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case FK_Data_4:
case FK_PCRel_4:
case VE::fixup_ve_reflong:
+ case VE::fixup_ve_srel32:
case VE::fixup_ve_hi32:
case VE::fixup_ve_lo32:
case VE::fixup_ve_pc_hi32:
@@ -103,6 +105,7 @@ public:
const static MCFixupKindInfo Infos[VE::NumTargetFixupKinds] = {
// name, offset, bits, flags
{"fixup_ve_reflong", 0, 32, 0},
+ {"fixup_ve_srel32", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_ve_hi32", 0, 32, 0},
{"fixup_ve_lo32", 0, 32, 0},
{"fixup_ve_pc_hi32", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
index 741e8320a941..ae065407409a 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
@@ -9,6 +9,7 @@
#include "VEFixupKinds.h"
#include "VEMCExpr.h"
#include "VEMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
@@ -46,16 +47,29 @@ unsigned VEELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
if (IsPCRel) {
switch (Fixup.getTargetKind()) {
default:
- llvm_unreachable("Unimplemented fixup -> relocation");
+ Ctx.reportError(Fixup.getLoc(), "Unsupported pc-relative fixup kind");
+ return ELF::R_VE_NONE;
+ case FK_Data_1:
case FK_PCRel_1:
- llvm_unreachable("Unimplemented fixup fk_data_1 -> relocation");
+ Ctx.reportError(Fixup.getLoc(),
+ "1-byte pc-relative data relocation is not supported");
+ return ELF::R_VE_NONE;
+ case FK_Data_2:
case FK_PCRel_2:
- llvm_unreachable("Unimplemented fixup fk_data_2 -> relocation");
- // FIXME: relative kind?
+ Ctx.reportError(Fixup.getLoc(),
+ "2-byte pc-relative data relocation is not supported");
+ return ELF::R_VE_NONE;
+ case FK_Data_4:
case FK_PCRel_4:
- return ELF::R_VE_REFLONG;
+ return ELF::R_VE_SREL32;
+ case FK_Data_8:
case FK_PCRel_8:
- return ELF::R_VE_REFQUAD;
+ Ctx.reportError(Fixup.getLoc(),
+ "8-byte pc-relative data relocation is not supported");
+ return ELF::R_VE_NONE;
+ case VE::fixup_ve_reflong:
+ case VE::fixup_ve_srel32:
+ return ELF::R_VE_SREL32;
case VE::fixup_ve_pc_hi32:
return ELF::R_VE_PC_HI32;
case VE::fixup_ve_pc_lo32:
@@ -65,25 +79,36 @@ unsigned VEELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
switch (Fixup.getTargetKind()) {
default:
- llvm_unreachable("Unimplemented fixup -> relocation");
+ Ctx.reportError(Fixup.getLoc(), "Unknown ELF relocation type");
+ return ELF::R_VE_NONE;
case FK_Data_1:
- llvm_unreachable("Unimplemented fixup fk_data_1 -> relocation");
+ Ctx.reportError(Fixup.getLoc(), "1-byte data relocation is not supported");
+ return ELF::R_VE_NONE;
case FK_Data_2:
- llvm_unreachable("Unimplemented fixup fk_data_2 -> relocation");
+ Ctx.reportError(Fixup.getLoc(), "2-byte data relocation is not supported");
+ return ELF::R_VE_NONE;
case FK_Data_4:
return ELF::R_VE_REFLONG;
case FK_Data_8:
return ELF::R_VE_REFQUAD;
case VE::fixup_ve_reflong:
return ELF::R_VE_REFLONG;
+ case VE::fixup_ve_srel32:
+ Ctx.reportError(Fixup.getLoc(),
+ "A non pc-relative srel32 relocation is not supported");
+ return ELF::R_VE_NONE;
case VE::fixup_ve_hi32:
return ELF::R_VE_HI32;
case VE::fixup_ve_lo32:
return ELF::R_VE_LO32;
case VE::fixup_ve_pc_hi32:
- llvm_unreachable("Unimplemented fixup pc_hi32 -> relocation");
+ Ctx.reportError(Fixup.getLoc(),
+ "A non pc-relative pc_hi32 relocation is not supported");
+ return ELF::R_VE_NONE;
case VE::fixup_ve_pc_lo32:
- llvm_unreachable("Unimplemented fixup pc_lo32 -> relocation");
+ Ctx.reportError(Fixup.getLoc(),
+ "A non pc-relative pc_lo32 relocation is not supported");
+ return ELF::R_VE_NONE;
case VE::fixup_ve_got_hi32:
return ELF::R_VE_GOT_HI32;
case VE::fixup_ve_got_lo32:
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
index 5d5dc1c5c891..46b995cee840 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
@@ -17,6 +17,9 @@ enum Fixups {
/// fixup_ve_reflong - 32-bit fixup corresponding to foo
fixup_ve_reflong = FirstTargetFixupKind,
+ /// fixup_ve_srel32 - 32-bit fixup corresponding to foo for relative branch
+ fixup_ve_srel32,
+
/// fixup_ve_hi32 - 32-bit fixup corresponding to foo@hi
fixup_ve_hi32,
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
index d50d8fcae9da..65bb0cf8b0d7 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
@@ -102,11 +102,11 @@ unsigned VEMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCSubtargetInfo &STI) const {
if (MO.isReg())
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
-
if (MO.isImm())
- return MO.getImm();
+ return static_cast<unsigned>(MO.getImm());
assert(MO.isExpr());
+
const MCExpr *Expr = MO.getExpr();
if (const VEMCExpr *SExpr = dyn_cast<VEMCExpr>(Expr)) {
MCFixupKind Kind = (MCFixupKind)SExpr->getFixupKind();
@@ -131,7 +131,7 @@ VEMCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
return getMachineOpValue(MI, MO, Fixups, STI);
Fixups.push_back(
- MCFixup::create(0, MO.getExpr(), (MCFixupKind)VE::fixup_ve_pc_lo32));
+ MCFixup::create(0, MO.getExpr(), (MCFixupKind)VE::fixup_ve_srel32));
return 0;
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
index a3ce3b3309be..4d45918ad0aa 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
@@ -12,11 +12,12 @@
//===----------------------------------------------------------------------===//
#include "VEMCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCValue.h"
using namespace llvm;
@@ -174,7 +175,13 @@ VE::Fixups VEMCExpr::getFixupKind(VEMCExpr::VariantKind Kind) {
bool VEMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
- return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
+ if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
+ return false;
+
+ Res =
+ MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
+
+ return true;
}
static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 32315543826a..5ef223d6030b 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -1720,7 +1720,7 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
-#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
+#define ADD_BINARY_VVP_OP(VVP_NAME, VP_NAME, ISD_NAME) case ISD::ISD_NAME:
#include "VVPNodes.def"
return lowerToVVP(Op, DAG);
}
diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td
index 2c88d5099a7b..99566e91ec11 100644
--- a/llvm/lib/Target/VE/VVPInstrInfo.td
+++ b/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -29,6 +29,16 @@ def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc.
IsVLVT<4>
]>;
+// BinaryFPOp(x,y,mask,vl)
+def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisFP<0>,
+ SDTCisInt<3>,
+ SDTCisSameNumEltsAs<0, 3>,
+ IsVLVT<4>
+]>;
+
// Binary operator commutative pattern.
class vvp_commutative<SDNode RootOp> :
PatFrags<
@@ -40,7 +50,32 @@ class vvp_commutative<SDNode RootOp> :
def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>;
def c_vvp_add : vvp_commutative<vvp_add>;
+def vvp_sub : SDNode<"VEISD::VVP_SUB", SDTIntBinOpVVP>;
+
+def vvp_mul : SDNode<"VEISD::VVP_MUL", SDTIntBinOpVVP>;
+def c_vvp_mul : vvp_commutative<vvp_mul>;
+
+def vvp_sdiv : SDNode<"VEISD::VVP_SDIV", SDTIntBinOpVVP>;
+def vvp_udiv : SDNode<"VEISD::VVP_UDIV", SDTIntBinOpVVP>;
+
def vvp_and : SDNode<"VEISD::VVP_AND", SDTIntBinOpVVP>;
def c_vvp_and : vvp_commutative<vvp_and>;
+def vvp_or : SDNode<"VEISD::VVP_OR", SDTIntBinOpVVP>;
+def c_vvp_or : vvp_commutative<vvp_or>;
+
+def vvp_xor : SDNode<"VEISD::VVP_XOR", SDTIntBinOpVVP>;
+def c_vvp_xor : vvp_commutative<vvp_xor>;
+
+def vvp_srl : SDNode<"VEISD::VVP_SRL", SDTIntBinOpVVP>;
+def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>;
+def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>;
+
+def vvp_fadd : SDNode<"VEISD::VVP_FADD", SDTFPBinOpVVP>;
+def c_vvp_fadd : vvp_commutative<vvp_fadd>;
+def vvp_fsub : SDNode<"VEISD::VVP_FSUB", SDTFPBinOpVVP>;
+def vvp_fmul : SDNode<"VEISD::VVP_FMUL", SDTFPBinOpVVP>;
+def c_vvp_fmul : vvp_commutative<vvp_fmul>;
+def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
+
// } Binary Operators
diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
index ac03e0bf627e..8d5d9d103547 100644
--- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -17,54 +17,177 @@
//===----------------------------------------------------------------------===//
include "VVPInstrInfo.td"
-multiclass VectorBinaryArith<
- SDPatternOperator OpNode,
- ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
- string OpBaseName> {
- // No mask.
+multiclass Binary_rv<SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ // Masked with select, broadcast.
+ // TODO
+
+ // Unmasked, broadcast.
def : Pat<(OpNode
- (any_broadcast ScalarVT:$sx),
- DataVT:$vy, (MaskVT true_mask), i32:$avl),
+ (any_broadcast ScalarVT:$sx), DataVT:$vy,
+ (MaskVT true_mask),
+ i32:$avl),
(!cast<Instruction>(OpBaseName#"rvl")
ScalarVT:$sx, $vy, $avl)>;
- def : Pat<(OpNode DataVT:$vx, DataVT:$vy, (MaskVT true_mask), i32:$avl),
+ // Masked, broadcast.
+ def : Pat<(OpNode
+ (any_broadcast ScalarVT:$sx), DataVT:$vy,
+ MaskVT:$mask,
+ i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvml")
+ ScalarVT:$sx, $vy, $mask, $avl)>;
+}
+
+multiclass Binary_vr<SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ // Masked with select, broadcast.
+ // TODO
+
+ // Unmasked, broadcast.
+ def : Pat<(OpNode
+ DataVT:$vx, (any_broadcast ScalarVT:$sy),
+ (MaskVT true_mask),
+ i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vrl")
+ $vx, ScalarVT:$sy, $avl)>;
+ // Masked, broadcast.
+ def : Pat<(OpNode
+ DataVT:$vx, (any_broadcast ScalarVT:$sy),
+ MaskVT:$mask,
+ i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vrml")
+ $vx, ScalarVT:$sy, $mask, $avl)>;
+}
+
+multiclass Binary_vv<SDPatternOperator OpNode,
+ ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ // Masked with select.
+ // TODO
+
+ // Unmasked.
+ def : Pat<(OpNode
+ DataVT:$vx, DataVT:$vy,
+ (MaskVT true_mask),
+ i32:$avl),
(!cast<Instruction>(OpBaseName#"vvl")
$vx, $vy, $avl)>;
- // Mask.
+ // Masked.
def : Pat<(OpNode
- (any_broadcast ScalarVT:$sx),
- DataVT:$vy, MaskVT:$mask, i32:$avl),
- (!cast<Instruction>(OpBaseName#"rvml")
- ScalarVT:$sx, $vy, $mask, $avl)>;
- def : Pat<(OpNode DataVT:$vx, DataVT:$vy, MaskVT:$mask, i32:$avl),
+ DataVT:$vx, DataVT:$vy,
+ MaskVT:$mask,
+ i32:$avl),
(!cast<Instruction>(OpBaseName#"vvml")
$vx, $vy, $mask, $avl)>;
+}
- // TODO We do not specify patterns for the immediate variants here. There
- // will be an immediate folding pass that takes care of switching to the
- // immediate variant where applicable.
+multiclass Binary_rv_vv<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
+ string OpBaseName> {
+ defm : Binary_rv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
+ defm : Binary_vv<OpNode, DataVT, MaskVT, OpBaseName>;
+}
+
+multiclass Binary_vr_vv<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
+ string OpBaseName> {
+ defm : Binary_vr<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
+ defm : Binary_vv<OpNode, DataVT, MaskVT, OpBaseName>;
+}
- // TODO Fold vvp_select into passthru.
+multiclass Binary_rv_vr_vv<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
+ string OpBaseName> {
+ defm : Binary_rv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
+ defm : Binary_vr_vv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
}
// Expand both 64bit and 32 bit variant (256 elements)
-multiclass VectorBinaryArith_ShortLong<
+multiclass Binary_rv_vv_ShortLong<
+ SDPatternOperator OpNode,
+ ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
+ ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
+ defm : Binary_rv_vv<OpNode,
+ LongScalarVT, LongDataVT, v256i1,
+ LongOpBaseName>;
+ defm : Binary_rv_vv<OpNode,
+ ShortScalarVT, ShortDataVT, v256i1,
+ ShortOpBaseName>;
+}
+
+multiclass Binary_vr_vv_ShortLong<
+ SDPatternOperator OpNode,
+ ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
+ ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
+ defm : Binary_vr_vv<OpNode,
+ LongScalarVT, LongDataVT, v256i1,
+ LongOpBaseName>;
+ defm : Binary_vr_vv<OpNode,
+ ShortScalarVT, ShortDataVT, v256i1,
+ ShortOpBaseName>;
+}
+
+multiclass Binary_rv_vr_vv_ShortLong<
SDPatternOperator OpNode,
ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
- defm : VectorBinaryArith<OpNode,
- LongScalarVT, LongDataVT, v256i1,
- LongOpBaseName>;
- defm : VectorBinaryArith<OpNode,
- ShortScalarVT, ShortDataVT, v256i1,
- ShortOpBaseName>;
+ defm : Binary_rv_vr_vv<OpNode,
+ LongScalarVT, LongDataVT, v256i1,
+ LongOpBaseName>;
+ defm : Binary_rv_vr_vv<OpNode,
+ ShortScalarVT, ShortDataVT, v256i1,
+ ShortOpBaseName>;
}
+defm : Binary_rv_vv_ShortLong<c_vvp_add,
+ i64, v256i64, "VADDSL",
+ i32, v256i32, "VADDSWSX">;
+defm : Binary_rv_vv_ShortLong<vvp_sub,
+ i64, v256i64, "VSUBSL",
+ i32, v256i32, "VSUBSWSX">;
+defm : Binary_rv_vv_ShortLong<c_vvp_mul,
+ i64, v256i64, "VMULSL",
+ i32, v256i32, "VMULSWSX">;
+defm : Binary_rv_vr_vv_ShortLong<vvp_sdiv,
+ i64, v256i64, "VDIVSL",
+ i32, v256i32, "VDIVSWSX">;
+defm : Binary_rv_vr_vv_ShortLong<vvp_udiv,
+ i64, v256i64, "VDIVUL",
+ i32, v256i32, "VDIVUW">;
+defm : Binary_rv_vv_ShortLong<c_vvp_and,
+ i64, v256i64, "VAND",
+ i32, v256i32, "PVANDLO">;
+defm : Binary_rv_vv_ShortLong<c_vvp_or,
+ i64, v256i64, "VOR",
+ i32, v256i32, "PVORLO">;
+defm : Binary_rv_vv_ShortLong<c_vvp_xor,
+ i64, v256i64, "VXOR",
+ i32, v256i32, "PVXORLO">;
+defm : Binary_vr_vv_ShortLong<vvp_shl,
+ i64, v256i64, "VSLL",
+ i32, v256i32, "PVSLLLO">;
+defm : Binary_vr_vv_ShortLong<vvp_sra,
+ i64, v256i64, "VSRAL",
+ i32, v256i32, "PVSRALO">;
+defm : Binary_vr_vv_ShortLong<vvp_srl,
+ i64, v256i64, "VSRL",
+ i32, v256i32, "PVSRLLO">;
-defm : VectorBinaryArith_ShortLong<c_vvp_add,
- i64, v256i64, "VADDSL",
- i32, v256i32, "VADDSWSX">;
-defm : VectorBinaryArith_ShortLong<c_vvp_and,
- i64, v256i64, "VAND",
- i32, v256i32, "PVANDLO">;
+defm : Binary_rv_vv_ShortLong<c_vvp_fadd,
+ f64, v256f64, "VFADDD",
+ f32, v256f32, "PVFADDUP">;
+defm : Binary_rv_vv_ShortLong<c_vvp_fmul,
+ f64, v256f64, "VFMULD",
+ f32, v256f32, "PVFMULUP">;
+defm : Binary_rv_vv_ShortLong<vvp_fsub,
+ f64, v256f64, "VFSUBD",
+ f32, v256f32, "PVFSUBUP">;
+defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
+ f64, v256f64, "VFDIVD",
+ f32, v256f32, "VFDIVS">;
diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def
index a68402e9ea10..8a9231f7d3e6 100644
--- a/llvm/lib/Target/VE/VVPNodes.def
+++ b/llvm/lib/Target/VE/VVPNodes.def
@@ -28,14 +28,38 @@
/// \p VVPName is a VVP Binary operator.
/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
#ifndef ADD_BINARY_VVP_OP
-#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y) HANDLE_VP_TO_VVP(VP_##Y, X)
+#define ADD_BINARY_VVP_OP(VVPNAME,VPNAME,SDNAME) \
+ ADD_VVP_OP(VVPNAME,SDNAME) \
+ HANDLE_VP_TO_VVP(VPNAME, VVPNAME)
+#endif
+
+#ifndef ADD_BINARY_VVP_OP_COMPACT
+#define ADD_BINARY_VVP_OP_COMPACT(NAME) \
+ ADD_BINARY_VVP_OP(VVP_##NAME,VP_##NAME,NAME)
#endif
// Integer arithmetic.
-ADD_BINARY_VVP_OP(VVP_ADD,ADD)
+ADD_BINARY_VVP_OP_COMPACT(ADD)
+ADD_BINARY_VVP_OP_COMPACT(SUB)
+ADD_BINARY_VVP_OP_COMPACT(MUL)
+ADD_BINARY_VVP_OP_COMPACT(UDIV)
+ADD_BINARY_VVP_OP_COMPACT(SDIV)
-ADD_BINARY_VVP_OP(VVP_AND,AND)
+ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA)
+ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL)
+ADD_BINARY_VVP_OP_COMPACT(SHL)
+
+ADD_BINARY_VVP_OP_COMPACT(AND)
+ADD_BINARY_VVP_OP_COMPACT(OR)
+ADD_BINARY_VVP_OP_COMPACT(XOR)
+
+// FP arithmetic.
+ADD_BINARY_VVP_OP_COMPACT(FADD)
+ADD_BINARY_VVP_OP_COMPACT(FSUB)
+ADD_BINARY_VVP_OP_COMPACT(FMUL)
+ADD_BINARY_VVP_OP_COMPACT(FDIV)
-#undef HANDLE_VP_TO_VVP
#undef ADD_BINARY_VVP_OP
+#undef ADD_BINARY_VVP_OP_COMPACT
#undef ADD_VVP_OP
+#undef HANDLE_VP_TO_VVP
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 7d1e6c553f81..56689d3ee06b 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -571,7 +571,6 @@ public:
// proper nesting.
bool ExpectBlockType = false;
bool ExpectFuncType = false;
- bool ExpectHeapType = false;
std::unique_ptr<WebAssemblyOperand> FunctionTable;
if (Name == "block") {
push(Block);
@@ -624,8 +623,6 @@ public:
if (parseFunctionTableOperand(&FunctionTable))
return true;
ExpectFuncType = true;
- } else if (Name == "ref.null") {
- ExpectHeapType = true;
}
if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) {
@@ -670,15 +667,6 @@ public:
return error("Unknown block type: ", Id);
addBlockTypeOperand(Operands, NameLoc, BT);
Parser.Lex();
- } else if (ExpectHeapType) {
- auto HeapType = WebAssembly::parseHeapType(Id.getString());
- if (HeapType == WebAssembly::HeapType::Invalid) {
- return error("Expected a heap type: ", Id);
- }
- Operands.push_back(std::make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, Id.getLoc(), Id.getEndLoc(),
- WebAssemblyOperand::IntOp{static_cast<int64_t>(HeapType)}));
- Parser.Lex();
} else {
// Assume this identifier is a label.
const MCExpr *Val;
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
index a6b5d4252f2f..128ce5c4fec0 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
@@ -112,9 +112,18 @@ bool WebAssemblyAsmTypeCheck::getLocal(SMLoc ErrorLoc, const MCInst &Inst,
return false;
}
-bool WebAssemblyAsmTypeCheck::checkEnd(SMLoc ErrorLoc) {
+bool WebAssemblyAsmTypeCheck::checkEnd(SMLoc ErrorLoc, bool PopVals) {
if (LastSig.Returns.size() > Stack.size())
return typeError(ErrorLoc, "end: insufficient values on the type stack");
+
+ if (PopVals) {
+ for (auto VT : llvm::reverse(LastSig.Returns)) {
+ if (popType(ErrorLoc, VT))
+ return true;
+ }
+ return false;
+ }
+
for (size_t i = 0; i < LastSig.Returns.size(); i++) {
auto EVT = LastSig.Returns[i];
auto PVT = Stack[Stack.size() - LastSig.Returns.size() + i];
@@ -221,7 +230,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) {
return true;
} else if (Name == "end_block" || Name == "end_loop" || Name == "end_if" ||
Name == "else" || Name == "end_try") {
- if (checkEnd(ErrorLoc))
+ if (checkEnd(ErrorLoc, Name == "else"))
return true;
if (Name == "end_block")
Unreachable = false;
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
index aa35213ccca3..2b07faf67a18 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
@@ -39,7 +39,7 @@ class WebAssemblyAsmTypeCheck final {
bool typeError(SMLoc ErrorLoc, const Twine &Msg);
bool popType(SMLoc ErrorLoc, Optional<wasm::ValType> EVT);
bool getLocal(SMLoc ErrorLoc, const MCInst &Inst, wasm::ValType &Type);
- bool checkEnd(SMLoc ErrorLoc);
+ bool checkEnd(SMLoc ErrorLoc, bool PopVals = false);
bool checkSig(SMLoc ErrorLoc, const wasm::WasmSignature &Sig);
bool getSymRef(SMLoc ErrorLoc, const MCInst &Inst,
const MCSymbolRefExpr *&SymRef);
diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 2e1e4f061219..5d38145559da 100644
--- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -241,28 +241,6 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
}
break;
}
- // heap_type operands, for e.g. ref.null:
- case WebAssembly::OPERAND_HEAPTYPE: {
- int64_t Val;
- uint64_t PrevSize = Size;
- if (!nextLEB(Val, Bytes, Size, true))
- return MCDisassembler::Fail;
- if (Val < 0 && Size == PrevSize + 1) {
- // The HeapType encoding is like BlockType, in that encodings that
- // decode as negative values indicate ValTypes. In practice we expect
- // either wasm::ValType::EXTERNREF or wasm::ValType::FUNCREF here.
- //
- // The positive SLEB values are reserved for future expansion and are
- // expected to be type indices in the typed function references
- // proposal, and should disassemble as MCSymbolRefExpr as in BlockType
- // above.
- MI.addOperand(MCOperand::createImm(Val & 0x7f));
- } else {
- MI.addOperand(
- MCOperand::createImm(int64_t(WebAssembly::HeapType::Invalid)));
- }
- break;
- }
// FP operands.
case WebAssembly::OPERAND_F32IMM: {
if (!parseImmediate<float>(MI, Size, Bytes))
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index 2967aaa00ad4..d72bfdbbfb99 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -366,26 +366,3 @@ void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
}
}
}
-
-void WebAssemblyInstPrinter::printWebAssemblyHeapTypeOperand(const MCInst *MI,
- unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isImm()) {
- switch (Op.getImm()) {
- case long(wasm::ValType::EXTERNREF):
- O << "extern";
- break;
- case long(wasm::ValType::FUNCREF):
- O << "func";
- break;
- default:
- O << "unsupported_heap_type_value";
- break;
- }
- } else {
- // Typed function references and other subtypes of funcref and externref
- // currently unimplemented.
- O << "unsupported_heap_type_operand";
- }
-}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
index 7d980c78c3c9..fe104cbca12e 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
@@ -47,8 +47,6 @@ public:
raw_ostream &O);
void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O);
- void printWebAssemblyHeapTypeOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O);
// Autogenerated by tblgen.
std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index c3d259e6ff20..d8122950e061 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyMCAsmInfo.h"
+#include "Utils/WebAssemblyUtilities.h"
#include "llvm/ADT/Triple.h"
using namespace llvm;
@@ -44,5 +45,13 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T,
SupportsDebugInformation = true;
+ // When compilation is done on a cpp file by clang, the exception model info
+ // is stored in LangOptions, which is later used to set the info in
+ // TargetOptions and then MCAsmInfo in LLVMTargetMachine::initAsmInfo(). But
+ // this process does not happen when compiling bitcode directly with clang, so
+ // we make sure this info is set correctly.
+ if (WebAssembly::WasmEnableEH || WebAssembly::WasmEnableSjLj)
+ ExceptionsType = ExceptionHandling::Wasm;
+
// TODO: UseIntegratedAssembler?
}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 4961c2ef9529..6e494b9430f7 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -106,9 +106,6 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
encodeSLEB128(int64_t(MO.getImm()), OS);
break;
case WebAssembly::OPERAND_SIGNATURE:
- case WebAssembly::OPERAND_HEAPTYPE:
- OS << uint8_t(MO.getImm());
- break;
case WebAssembly::OPERAND_VEC_I8IMM:
support::endian::write<uint8_t>(OS, MO.getImm(), support::little);
break;
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index d07bfce9abc1..b2f10ca93a4f 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -78,8 +78,6 @@ enum OperandType {
OPERAND_BRLIST,
/// 32-bit unsigned table number.
OPERAND_TABLE,
- /// heap type immediate for ref.null.
- OPERAND_HEAPTYPE,
};
} // end namespace WebAssembly
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
index 6f81431bba2d..0412e524f800 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
@@ -41,13 +41,6 @@ Optional<wasm::ValType> WebAssembly::parseType(StringRef Type) {
return Optional<wasm::ValType>();
}
-WebAssembly::HeapType WebAssembly::parseHeapType(StringRef Type) {
- return StringSwitch<WebAssembly::HeapType>(Type)
- .Case("extern", WebAssembly::HeapType::Externref)
- .Case("func", WebAssembly::HeapType::Funcref)
- .Default(WebAssembly::HeapType::Invalid);
-}
-
WebAssembly::BlockType WebAssembly::parseBlockType(StringRef Type) {
// Multivalue block types are handled separately in parseSignature
return StringSwitch<WebAssembly::BlockType>(Type)
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
index 8d757df27b34..042d51c7d6cb 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
@@ -41,17 +41,9 @@ enum class BlockType : unsigned {
Multivalue = 0xffff,
};
-/// Used as immediate MachineOperands for heap types, e.g. for ref.null.
-enum class HeapType : unsigned {
- Invalid = 0x00,
- Externref = unsigned(wasm::ValType::EXTERNREF),
- Funcref = unsigned(wasm::ValType::FUNCREF),
-};
-
// Convert StringRef to ValType / HealType / BlockType
Optional<wasm::ValType> parseType(StringRef Type);
-HeapType parseHeapType(StringRef Type);
BlockType parseBlockType(StringRef Type);
MVT parseMVT(StringRef Type);
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp
index 3da80f4fc875..b87c884c9e4a 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp
@@ -18,6 +18,31 @@
#include "llvm/MC/MCContext.h"
using namespace llvm;
+// Exception handling & setjmp-longjmp handling related options. These are
+// defined here to be shared between WebAssembly and its subdirectories.
+
+// Emscripten's asm.js-style exception handling
+cl::opt<bool> WebAssembly::WasmEnableEmEH(
+ "enable-emscripten-cxx-exceptions",
+ cl::desc("WebAssembly Emscripten-style exception handling"),
+ cl::init(false));
+// Emscripten's asm.js-style setjmp/longjmp handling
+cl::opt<bool> WebAssembly::WasmEnableEmSjLj(
+ "enable-emscripten-sjlj",
+ cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
+ cl::init(false));
+// Exception handling using wasm EH instructions
+cl::opt<bool>
+ WebAssembly::WasmEnableEH("wasm-enable-eh",
+ cl::desc("WebAssembly exception handling"),
+ cl::init(false));
+// setjmp/longjmp handling using wasm EH instrutions
+cl::opt<bool>
+ WebAssembly::WasmEnableSjLj("wasm-enable-sjlj",
+ cl::desc("WebAssembly setjmp/longjmp handling"),
+ cl::init(false));
+
+// Function names in libc++abi and libunwind
const char *const WebAssembly::CxaBeginCatchFn = "__cxa_begin_catch";
const char *const WebAssembly::CxaRethrowFn = "__cxa_rethrow";
const char *const WebAssembly::StdTerminateFn = "_ZSt9terminatev";
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
index f6e96d9b2877..d024185defb4 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
@@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/CommandLine.h"
namespace llvm {
@@ -70,6 +71,12 @@ inline bool isRefType(const Type *Ty) {
bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
bool mayThrow(const MachineInstr &MI);
+// Exception handling / setjmp-longjmp handling command-line options
+extern cl::opt<bool> WasmEnableEmEH; // asm.js-style EH
+extern cl::opt<bool> WasmEnableEmSjLj; // asm.js-style SjLJ
+extern cl::opt<bool> WasmEnableEH; // EH using Wasm EH instructions
+extern cl::opt<bool> WasmEnableSjLj; // SjLj using Wasm EH instructions
+
// Exception-related function names
extern const char *const ClangCallTerminateFn;
extern const char *const CxaBeginCatchFn;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 0d3f51693261..e3af6b2662ef 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -51,8 +51,6 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
extern cl::opt<bool> WasmKeepRegisters;
-extern cl::opt<bool> WasmEnableEmEH;
-extern cl::opt<bool> WasmEnableEmSjLj;
//===----------------------------------------------------------------------===//
// Helpers.
@@ -196,6 +194,13 @@ void WebAssemblyAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), Mutable});
}
+ // If the GlobalVariable refers to a table, we handle it here instead of
+ // in emitExternalDecls
+ if (Sym->isTable()) {
+ getTargetStreamer()->emitTableType(Sym);
+ return;
+ }
+
emitVisibility(Sym, GV->getVisibility(), !GV->isDeclaration());
if (GV->hasInitializer()) {
assert(getSymbolPreferLocal(*GV) == Sym);
@@ -315,8 +320,9 @@ void WebAssemblyAsmPrinter::emitExternalDecls(const Module &M) {
// will discard it later if it turns out not to be necessary.
auto Signature = signatureFromMVTs(Results, Params);
bool InvokeDetected = false;
- auto *Sym = getMCSymbolForFunction(&F, WasmEnableEmEH || WasmEnableEmSjLj,
- Signature.get(), InvokeDetected);
+ auto *Sym = getMCSymbolForFunction(
+ &F, WebAssembly::WasmEnableEmEH || WebAssembly::WasmEnableEmSjLj,
+ Signature.get(), InvokeDetected);
// Multiple functions can be mapped to the same invoke symbol. For
// example, two IR functions '__invoke_void_i8*' and '__invoke_void_i32'
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 7832f199a2cc..17e867e4c7d8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -1741,7 +1741,7 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
void WebAssemblyCFGStackify::cleanupFunctionData(MachineFunction &MF) {
if (FakeCallerBB)
- MF.DeleteMachineBasicBlock(FakeCallerBB);
+ MF.deleteMachineBasicBlock(FakeCallerBB);
AppendixBB = FakeCallerBB = nullptr;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index 1fa0ea3867c7..a3a33f4a5b3a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -31,6 +31,7 @@ HANDLE_NODETYPE(SWIZZLE)
HANDLE_NODETYPE(VEC_SHL)
HANDLE_NODETYPE(VEC_SHR_S)
HANDLE_NODETYPE(VEC_SHR_U)
+HANDLE_NODETYPE(NARROW_U)
HANDLE_NODETYPE(EXTEND_LOW_S)
HANDLE_NODETYPE(EXTEND_LOW_U)
HANDLE_NODETYPE(EXTEND_HIGH_S)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 0df8f3e0e09c..38ed4c73fb93 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -176,6 +176,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
+ setTargetDAGCombine(ISD::TRUNCATE);
+
// Support saturating add for i8x16 and i16x8
for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
for (auto T : {MVT::v16i8, MVT::v8i16})
@@ -644,8 +646,7 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
Register RegFuncref =
MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
MachineInstr *RefNull =
- BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref)
- .addImm(static_cast<int32_t>(WebAssembly::HeapType::Funcref));
+ BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
BB->insertAfter(Const0->getIterator(), RefNull);
MachineInstr *TableSet =
@@ -2610,6 +2611,114 @@ performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return DAG.getNode(Op, SDLoc(N), ResVT, Source);
}
+// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
+static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
+ const SDLoc &DL, unsigned VectorWidth) {
+ EVT VT = Vec.getValueType();
+ EVT ElVT = VT.getVectorElementType();
+ unsigned Factor = VT.getSizeInBits() / VectorWidth;
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
+ VT.getVectorNumElements() / Factor);
+
+ // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
+ unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
+ assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
+
+ // This is the index of the first element of the VectorWidth-bit chunk
+ // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+ IdxVal &= ~(ElemsPerChunk - 1);
+
+ // If the input is a buildvector just emit a smaller one.
+ if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getBuildVector(ResultVT, DL,
+ Vec->ops().slice(IdxVal, ElemsPerChunk));
+
+ SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
+}
+
+// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
+// is the expected destination value type after recursion. In is the initial
+// input. Note that the input should have enough leading zero bits to prevent
+// NARROW_U from saturating results.
+static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL,
+ SelectionDAG &DAG) {
+ EVT SrcVT = In.getValueType();
+
+ // No truncation required, we might get here due to recursive calls.
+ if (SrcVT == DstVT)
+ return In;
+
+ unsigned SrcSizeInBits = SrcVT.getSizeInBits();
+ unsigned NumElems = SrcVT.getVectorNumElements();
+ if (!isPowerOf2_32(NumElems))
+ return SDValue();
+ assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
+ assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
+
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
+
+ // Narrow to the largest type possible:
+ // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
+ EVT InVT = MVT::i16, OutVT = MVT::i8;
+ if (SrcVT.getScalarSizeInBits() > 16) {
+ InVT = MVT::i32;
+ OutVT = MVT::i16;
+ }
+ unsigned SubSizeInBits = SrcSizeInBits / 2;
+ InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
+ OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
+
+ // Split lower/upper subvectors.
+ SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
+ SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
+
+ // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
+ if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
+ Lo = DAG.getBitcast(InVT, Lo);
+ Hi = DAG.getBitcast(InVT, Hi);
+ SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
+ return DAG.getBitcast(DstVT, Res);
+ }
+
+ // Recursively narrow lower/upper subvectors, concat result and narrow again.
+ EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
+ Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
+ Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
+
+ PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
+ return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
+}
+
+static SDValue performTruncateCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ auto &DAG = DCI.DAG;
+
+ SDValue In = N->getOperand(0);
+ EVT InVT = In.getValueType();
+ if (!InVT.isSimple())
+ return SDValue();
+
+ EVT OutVT = N->getValueType(0);
+ if (!OutVT.isVector())
+ return SDValue();
+
+ EVT OutSVT = OutVT.getVectorElementType();
+ EVT InSVT = InVT.getVectorElementType();
+ // Currently only cover truncate to v16i8 or v8i16.
+ if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
+ (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
+ return SDValue();
+
+ SDLoc DL(N);
+ APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
+ OutVT.getScalarSizeInBits());
+ In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
+ return truncateVectorWithNARROW(OutVT, In, DL, DAG);
+}
+
SDValue
WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -2626,5 +2735,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_ROUND:
case ISD::CONCAT_VECTORS:
return performVectorTruncZeroCombine(N, DCI);
+ case ISD::TRUNCATE:
+ return performTruncateCombine(N, DCI);
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index ee9247a8bef9..3fb0af1d47a0 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -202,11 +202,6 @@ def Signature : Operand<i32> {
let PrintMethod = "printWebAssemblySignatureOperand";
}
-let OperandType = "OPERAND_HEAPTYPE" in
-def HeapType : Operand<i32> {
- let PrintMethod = "printWebAssemblyHeapTypeOperand";
-}
-
let OperandType = "OPERAND_TYPEINDEX" in
def TypeIndex : Operand<i32>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
index ef9bd35d004a..76a88caafc47 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
@@ -11,13 +11,14 @@
///
//===----------------------------------------------------------------------===//
-multiclass REF_I<WebAssemblyRegClass rc, ValueType vt> {
- defm REF_NULL_#rc : I<(outs rc:$res), (ins HeapType:$heaptype),
- (outs), (ins HeapType:$heaptype),
- [],
- "ref.null\t$res, $heaptype",
- "ref.null\t$heaptype",
- 0xd0>,
+multiclass REF_I<WebAssemblyRegClass rc, ValueType vt, string ht> {
+ defm REF_NULL_#rc : I<(outs rc:$dst), (ins),
+ (outs), (ins),
+ [(set rc:$dst, (!cast<Intrinsic>("int_wasm_ref_null_" # ht)))],
+ "ref.null_" # ht # "$dst",
+ "ref.null_" # ht,
+ !cond(!eq(ht, "func") : 0xd070,
+ !eq(ht, "extern") : 0xd06f)>,
Requires<[HasReferenceTypes]>;
defm SELECT_#rc: I<(outs rc:$dst), (ins rc:$lhs, rc:$rhs, I32:$cond),
(outs), (ins),
@@ -28,8 +29,8 @@ multiclass REF_I<WebAssemblyRegClass rc, ValueType vt> {
Requires<[HasReferenceTypes]>;
}
-defm "" : REF_I<FUNCREF, funcref>;
-defm "" : REF_I<EXTERNREF, externref>;
+defm "" : REF_I<FUNCREF, funcref, "func">;
+defm "" : REF_I<EXTERNREF, externref, "extern">;
foreach rc = [FUNCREF, EXTERNREF] in {
def : Pat<(select (i32 (setne I32:$cond, 0)), rc:$lhs, rc:$rhs),
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 30b99c3a69a9..5bb12c7fbdc7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1278,6 +1278,14 @@ multiclass SIMDNarrow<Vec vec, bits<32> baseInst> {
defm "" : SIMDNarrow<I16x8, 101>;
defm "" : SIMDNarrow<I32x4, 133>;
+// WebAssemblyISD::NARROW_U
+def wasm_narrow_t : SDTypeProfile<1, 2, []>;
+def wasm_narrow_u : SDNode<"WebAssemblyISD::NARROW_U", wasm_narrow_t>;
+def : Pat<(v16i8 (wasm_narrow_u (v8i16 V128:$left), (v8i16 V128:$right))),
+ (NARROW_U_I8x16 $left, $right)>;
+def : Pat<(v8i16 (wasm_narrow_u (v4i32 V128:$left), (v4i32 V128:$right))),
+ (NARROW_U_I16x8 $left, $right)>;
+
// Bitcasts are nops
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
foreach t1 = AllVecs in
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
index e44c2073eaeb..1fd00bf1cbc8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
@@ -20,7 +20,7 @@ def WebAssemblyTableGet : SDNode<"WebAssemblyISD::TABLE_GET", WebAssemblyTableGe
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-multiclass TABLE<WebAssemblyRegClass rc> {
+multiclass TABLE<WebAssemblyRegClass rc, string suffix> {
let mayLoad = 1 in
defm TABLE_GET_#rc : I<(outs rc:$res), (ins table32_op:$table, I32:$i),
(outs), (ins table32_op:$table),
@@ -39,14 +39,14 @@ multiclass TABLE<WebAssemblyRegClass rc> {
defm TABLE_GROW_#rc : I<(outs I32:$sz), (ins table32_op:$table, rc:$val, I32:$n),
(outs), (ins table32_op:$table),
- [],
+ [(set I32:$sz, (!cast<Intrinsic>("int_wasm_table_grow_" # suffix) (WebAssemblyWrapper tglobaladdr:$table), rc:$val, I32:$n))],
"table.grow\t$sz, $table, $val, $n",
"table.grow\t$table",
0xfc0f>;
defm TABLE_FILL_#rc : I<(outs), (ins table32_op:$table, I32:$i, rc:$val, I32:$n),
(outs), (ins table32_op:$table),
- [],
+ [(!cast<Intrinsic>("int_wasm_table_fill_" # suffix) (WebAssemblyWrapper tglobaladdr:$table), I32:$i, rc:$val, I32:$n)],
"table.fill\t$table, $i, $val, $n",
"table.fill\t$table",
0xfc11>;
@@ -62,8 +62,8 @@ multiclass TABLE<WebAssemblyRegClass rc> {
}
}
-defm "" : TABLE<FUNCREF>, Requires<[HasReferenceTypes]>;
-defm "" : TABLE<EXTERNREF>, Requires<[HasReferenceTypes]>;
+defm "" : TABLE<FUNCREF, "funcref">, Requires<[HasReferenceTypes]>;
+defm "" : TABLE<EXTERNREF, "externref">, Requires<[HasReferenceTypes]>;
def : Pat<(WebAssemblyTableSet mcsym:$table, i32:$idx, funcref:$r),
(TABLE_SET_FUNCREF mcsym:$table, i32:$idx, funcref:$r)>,
@@ -71,7 +71,7 @@ def : Pat<(WebAssemblyTableSet mcsym:$table, i32:$idx, funcref:$r),
defm TABLE_SIZE : I<(outs I32:$sz), (ins table32_op:$table),
(outs), (ins table32_op:$table),
- [],
+ [(set I32:$sz, (int_wasm_table_size (WebAssemblyWrapper tglobaladdr:$table)))],
"table.size\t$sz, $table",
"table.size\t$table",
0xfc10>,
@@ -80,7 +80,9 @@ defm TABLE_SIZE : I<(outs I32:$sz), (ins table32_op:$table),
defm TABLE_COPY : I<(outs), (ins table32_op:$table1, table32_op:$table2, I32:$d, I32:$s, I32:$n),
(outs), (ins table32_op:$table1, table32_op:$table2),
- [],
+ [(int_wasm_table_copy (WebAssemblyWrapper tglobaladdr:$table1),
+ (WebAssemblyWrapper tglobaladdr:$table2),
+ I32:$d, I32:$s, I32:$n)],
"table.copy\t$table1, $table2, $d, $s, $n",
"table.copy\t$table1, $table2",
0xfc0e>,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 4eacc921b6cd..23aaa5160abd 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -267,6 +267,7 @@
///
///===----------------------------------------------------------------------===//
+#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyTargetMachine.h"
#include "llvm/ADT/StringExtras.h"
@@ -285,13 +286,6 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-lower-em-ehsjlj"
-// Emscripten's asm.js-style exception handling
-extern cl::opt<bool> WasmEnableEmEH;
-// Emscripten's asm.js-style setjmp/longjmp handling
-extern cl::opt<bool> WasmEnableEmSjLj;
-// Wasm setjmp/longjmp handling using wasm EH instructions
-extern cl::opt<bool> WasmEnableSjLj;
-
static cl::list<std::string>
EHAllowlist("emscripten-cxx-exceptions-allowed",
cl::desc("The list of function names in which Emscripten-style "
@@ -370,8 +364,9 @@ public:
static char ID;
WebAssemblyLowerEmscriptenEHSjLj()
- : ModulePass(ID), EnableEmEH(WasmEnableEmEH),
- EnableEmSjLj(WasmEnableEmSjLj), EnableWasmSjLj(WasmEnableSjLj) {
+ : ModulePass(ID), EnableEmEH(WebAssembly::WasmEnableEmEH),
+ EnableEmSjLj(WebAssembly::WasmEnableEmSjLj),
+ EnableWasmSjLj(WebAssembly::WasmEnableSjLj) {
assert(!(EnableEmSjLj && EnableWasmSjLj) &&
"Two SjLj modes cannot be turned on at the same time");
assert(!(EnableEmEH && EnableWasmSjLj) &&
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 0b953a90aeab..09bccef17ab0 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -40,9 +40,6 @@ cl::opt<bool>
" instruction output for test purposes only."),
cl::init(false));
-extern cl::opt<bool> WasmEnableEmEH;
-extern cl::opt<bool> WasmEnableEmSjLj;
-
static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI);
MCSymbol *
@@ -66,9 +63,11 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
// they reach this point as aggregate Array types with an element type
// that is a reference type.
wasm::ValType Type;
+ bool IsTable = false;
if (GlobalVT->isArrayTy() &&
WebAssembly::isRefType(GlobalVT->getArrayElementType())) {
MVT VT;
+ IsTable = true;
switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) {
case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF:
VT = MVT::funcref;
@@ -85,9 +84,14 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
} else
report_fatal_error("Aggregate globals not yet implemented");
- WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
- WasmSym->setGlobalType(
- wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true});
+ if (IsTable) {
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
+ WasmSym->setTableType(Type);
+ } else {
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ WasmSym->setGlobalType(
+ wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true});
+ }
}
return WasmSym;
}
@@ -105,7 +109,8 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
bool InvokeDetected = false;
auto *WasmSym = Printer.getMCSymbolForFunction(
- F, WasmEnableEmEH || WasmEnableEmSjLj, Signature.get(), InvokeDetected);
+ F, WebAssembly::WasmEnableEmEH || WebAssembly::WasmEnableEmSjLj,
+ Signature.get(), InvokeDetected);
WasmSym->setSignature(Signature.get());
Printer.addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
@@ -275,11 +280,6 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
SmallVector<wasm::ValType, 4>());
break;
}
- } else if (Info.OperandType == WebAssembly::OPERAND_HEAPTYPE) {
- assert(static_cast<WebAssembly::HeapType>(MO.getImm()) !=
- WebAssembly::HeapType::Invalid);
- // With typed function references, this will need a case for type
- // index operands. Otherwise, fall through.
}
}
MCOp = MCOperand::createImm(MO.getImm());
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 7b70d99b5f52..482837178f3d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -14,6 +14,7 @@
#include "WebAssemblyTargetMachine.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "TargetInfo/WebAssemblyTargetInfo.h"
+#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyTargetObjectFile.h"
@@ -24,6 +25,7 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
@@ -33,28 +35,6 @@ using namespace llvm;
#define DEBUG_TYPE "wasm"
-// Emscripten's asm.js-style exception handling
-cl::opt<bool>
- WasmEnableEmEH("enable-emscripten-cxx-exceptions",
- cl::desc("WebAssembly Emscripten-style exception handling"),
- cl::init(false));
-
-// Emscripten's asm.js-style setjmp/longjmp handling
-cl::opt<bool> WasmEnableEmSjLj(
- "enable-emscripten-sjlj",
- cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
- cl::init(false));
-
-// Exception handling using wasm EH instructions
-cl::opt<bool> WasmEnableEH("wasm-enable-eh",
- cl::desc("WebAssembly exception handling"),
- cl::init(false));
-
-// setjmp/longjmp handling using wasm EH instrutions
-cl::opt<bool> WasmEnableSjLj("wasm-enable-sjlj",
- cl::desc("WebAssembly setjmp/longjmp handling"),
- cl::init(false));
-
// A command-line option to keep implicit locals
// for the purpose of testing with lit/llc ONLY.
// This produces output which is not valid WebAssembly, and is not supported
@@ -368,7 +348,23 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
return nullptr; // No reg alloc
}
-static void basicCheckForEHAndSjLj(const TargetMachine *TM) {
+using WebAssembly::WasmEnableEH;
+using WebAssembly::WasmEnableEmEH;
+using WebAssembly::WasmEnableEmSjLj;
+using WebAssembly::WasmEnableSjLj;
+
+static void basicCheckForEHAndSjLj(TargetMachine *TM) {
+ // Before checking, we make sure TargetOptions.ExceptionModel is the same as
+ // MCAsmInfo.ExceptionsType. Normally these have to be the same, because clang
+ // stores the exception model info in LangOptions, which is later transferred
+ // to TargetOptions and MCAsmInfo. But when clang compiles bitcode directly,
+ // clang's LangOptions is not used and thus the exception model info is not
+ // correctly transferred to TargetOptions and MCAsmInfo, so we make sure we
+ // have the correct exception model in in WebAssemblyMCAsmInfo constructor.
+ // But in this case TargetOptions is still not updated, so we make sure they
+ // are the same.
+ TM->Options.ExceptionModel = TM->getMCAsmInfo()->getExceptionHandlingType();
+
// Basic Correctness checking related to -exception-model
if (TM->Options.ExceptionModel != ExceptionHandling::None &&
TM->Options.ExceptionModel != ExceptionHandling::Wasm)
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8ce6b47d10e8..2ba0b97229cc 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1759,7 +1759,8 @@ bool X86AsmParser::CreateMemForMSInlineAsm(
// registers in a mmory expression, and though unaccessible via rip/eip.
if (IsGlobalLV && (BaseReg || IndexReg)) {
Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
- End, Size, Identifier, Decl));
+ End, Size, Identifier, Decl,
+ FrontendSize));
return false;
}
// Otherwise, we set the base register to a non-zero value
@@ -2551,8 +2552,6 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
StringRef ErrMsg;
unsigned BaseReg = SM.getBaseReg();
unsigned IndexReg = SM.getIndexReg();
- if (IndexReg && BaseReg == X86::RIP)
- BaseReg = 0;
unsigned Scale = SM.getScale();
if (!PtrInOperand)
Size = SM.getElementSize() << 3;
@@ -4430,8 +4429,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
- unsigned NumSuccessfulMatches =
- std::count(std::begin(Match), std::end(Match), Match_Success);
+ unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
if (NumSuccessfulMatches == 1) {
if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
return true;
@@ -4479,7 +4477,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
// If all of the instructions reported an invalid mnemonic, then the original
// mnemonic was invalid.
- if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
+ if (llvm::count(Match, Match_MnemonicFail) == 4) {
if (OriginalError == Match_MnemonicFail)
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
Op.getLocRange(), MatchingInlineAsm);
@@ -4508,16 +4506,14 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
}
// If one instruction matched as unsupported, report this as unsupported.
- if (std::count(std::begin(Match), std::end(Match),
- Match_Unsupported) == 1) {
+ if (llvm::count(Match, Match_Unsupported) == 1) {
return Error(IDLoc, "unsupported instruction", EmptyRange,
MatchingInlineAsm);
}
// If one instruction matched with a missing feature, report this as a
// missing feature.
- if (std::count(std::begin(Match), std::end(Match),
- Match_MissingFeature) == 1) {
+ if (llvm::count(Match, Match_MissingFeature) == 1) {
ErrorInfo = Match_MissingFeature;
return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
MatchingInlineAsm);
@@ -4525,8 +4521,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
// If one instruction matched with an invalid operand, report this as an
// operand failure.
- if (std::count(std::begin(Match), std::end(Match),
- Match_InvalidOperand) == 1) {
+ if (llvm::count(Match, Match_InvalidOperand) == 1) {
return Error(IDLoc, "invalid operand for instruction", EmptyRange,
MatchingInlineAsm);
}
@@ -4674,8 +4669,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
Op.getLocRange(), MatchingInlineAsm);
}
- unsigned NumSuccessfulMatches =
- std::count(std::begin(Match), std::end(Match), Match_Success);
+ unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
// If matching was ambiguous and we had size information from the frontend,
// try again with that. This handles cases like "movxz eax, m8/m16".
@@ -4721,16 +4715,14 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
}
// If one instruction matched as unsupported, report this as unsupported.
- if (std::count(std::begin(Match), std::end(Match),
- Match_Unsupported) == 1) {
+ if (llvm::count(Match, Match_Unsupported) == 1) {
return Error(IDLoc, "unsupported instruction", EmptyRange,
MatchingInlineAsm);
}
// If one instruction matched with a missing feature, report this as a
// missing feature.
- if (std::count(std::begin(Match), std::end(Match),
- Match_MissingFeature) == 1) {
+ if (llvm::count(Match, Match_MissingFeature) == 1) {
ErrorInfo = Match_MissingFeature;
return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
MatchingInlineAsm);
@@ -4738,14 +4730,12 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
// If one instruction matched with an invalid operand, report this as an
// operand failure.
- if (std::count(std::begin(Match), std::end(Match),
- Match_InvalidOperand) == 1) {
+ if (llvm::count(Match, Match_InvalidOperand) == 1) {
return Error(IDLoc, "invalid operand for instruction", EmptyRange,
MatchingInlineAsm);
}
- if (std::count(std::begin(Match), std::end(Match),
- Match_InvalidImmUnsignedi4) == 1) {
+ if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 9164c699b569..67b1244708a8 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -285,6 +285,12 @@ struct X86Operand final : public MCParsedAsmOperand {
bool isOffsetOfLocal() const override { return isImm() && Imm.LocalRef; }
+ bool isMemPlaceholder(const MCInstrDesc &Desc) const override {
+ // Only MS InlineAsm uses global variables with registers rather than
+ // rip/eip.
+ return isMem() && !Mem.DefaultBaseReg && Mem.FrontendSize;
+ }
+
bool needAddressOf() const override { return AddressOf; }
bool isMem() const override { return Kind == Memory; }
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index b51011e2c52f..a903c5f455a2 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -948,39 +948,39 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
CASE_UNPCK(PUNPCKHBW, r)
- case X86::MMX_PUNPCKHBWirr:
+ case X86::MMX_PUNPCKHBWrr:
Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
RegForm = true;
LLVM_FALLTHROUGH;
CASE_UNPCK(PUNPCKHBW, m)
- case X86::MMX_PUNPCKHBWirm:
+ case X86::MMX_PUNPCKHBWrm:
Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKHMask(getRegOperandNumElts(MI, 8, 0), 8, ShuffleMask);
break;
CASE_UNPCK(PUNPCKHWD, r)
- case X86::MMX_PUNPCKHWDirr:
+ case X86::MMX_PUNPCKHWDrr:
Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
RegForm = true;
LLVM_FALLTHROUGH;
CASE_UNPCK(PUNPCKHWD, m)
- case X86::MMX_PUNPCKHWDirm:
+ case X86::MMX_PUNPCKHWDrm:
Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKHMask(getRegOperandNumElts(MI, 16, 0), 16, ShuffleMask);
break;
CASE_UNPCK(PUNPCKHDQ, r)
- case X86::MMX_PUNPCKHDQirr:
+ case X86::MMX_PUNPCKHDQrr:
Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
RegForm = true;
LLVM_FALLTHROUGH;
CASE_UNPCK(PUNPCKHDQ, m)
- case X86::MMX_PUNPCKHDQirm:
+ case X86::MMX_PUNPCKHDQrm:
Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKHMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask);
@@ -998,39 +998,39 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
CASE_UNPCK(PUNPCKLBW, r)
- case X86::MMX_PUNPCKLBWirr:
+ case X86::MMX_PUNPCKLBWrr:
Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
RegForm = true;
LLVM_FALLTHROUGH;
CASE_UNPCK(PUNPCKLBW, m)
- case X86::MMX_PUNPCKLBWirm:
+ case X86::MMX_PUNPCKLBWrm:
Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKLMask(getRegOperandNumElts(MI, 8, 0), 8, ShuffleMask);
break;
CASE_UNPCK(PUNPCKLWD, r)
- case X86::MMX_PUNPCKLWDirr:
+ case X86::MMX_PUNPCKLWDrr:
Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
RegForm = true;
LLVM_FALLTHROUGH;
CASE_UNPCK(PUNPCKLWD, m)
- case X86::MMX_PUNPCKLWDirm:
+ case X86::MMX_PUNPCKLWDrm:
Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKLMask(getRegOperandNumElts(MI, 16, 0), 16, ShuffleMask);
break;
CASE_UNPCK(PUNPCKLDQ, r)
- case X86::MMX_PUNPCKLDQirr:
+ case X86::MMX_PUNPCKLDQrr:
Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
RegForm = true;
LLVM_FALLTHROUGH;
CASE_UNPCK(PUNPCKLDQ, m)
- case X86::MMX_PUNPCKLDQirm:
+ case X86::MMX_PUNPCKLDQrm:
Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKLMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
index 11251fb2b2ba..bf3f4e990ecc 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
@@ -236,7 +236,7 @@ bool X86WinCOFFTargetStreamer::emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) {
bool X86WinCOFFTargetStreamer::emitFPOStackAlign(unsigned Align, SMLoc L) {
if (checkInFPOPrologue(L))
return true;
- if (!llvm::any_of(CurFPOData->Instructions, [](const FPOInstruction &Inst) {
+ if (llvm::none_of(CurFPOData->Instructions, [](const FPOInstruction &Inst) {
return Inst.Op == FPOInstruction::SetFrame;
})) {
getContext().reportError(
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 2e08482e4ff6..d48b8e458219 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -754,8 +754,6 @@ static void emitNonLazyStubs(MachineModuleInfo *MMI, MCStreamer &OutStreamer) {
void X86AsmPrinter::emitEndOfAsmFile(Module &M) {
const Triple &TT = TM.getTargetTriple();
- emitAsanMemaccessSymbols(M);
-
if (TT.isOSBinFormatMachO()) {
// Mach-O uses non-lazy symbol stubs to encode per-TU information into
// global table for symbol lookup.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.h b/llvm/lib/Target/X86/X86AsmPrinter.h
index 3b0983a7d935..b22f25af26cf 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -31,6 +31,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
FaultMaps FM;
std::unique_ptr<MCCodeEmitter> CodeEmitter;
bool EmitFPOData = false;
+ bool ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = false;
// This utility class tracks the length of a stackmap instruction's 'shadow'.
// It is used by the X86AsmPrinter to ensure that the stackmap shadow
@@ -100,20 +101,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
// Address sanitizer specific lowering for X86.
void LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI);
- void emitAsanMemaccessSymbols(Module &M);
- void emitAsanMemaccessPartial(Module &M, unsigned Reg,
- const ASanAccessInfo &AccessInfo,
- MCSubtargetInfo &STI);
- void emitAsanMemaccessFull(Module &M, unsigned Reg,
- const ASanAccessInfo &AccessInfo,
- MCSubtargetInfo &STI);
- void emitAsanReportError(Module &M, unsigned Reg,
- const ASanAccessInfo &AccessInfo,
- MCSubtargetInfo &STI);
-
- typedef std::tuple<unsigned /*Reg*/, uint32_t /*AccessInfo*/>
- AsanMemaccessTuple;
- std::map<AsanMemaccessTuple, MCSymbol *> AsanMemaccessSymbols;
// Choose between emitting .seh_ directives and .cv_fpo_ directives.
void EmitSEHInstruction(const MachineInstr *MI);
@@ -165,6 +152,10 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void emitFunctionBodyStart() override;
void emitFunctionBodyEnd() override;
+
+ bool shouldEmitWeakSwiftAsyncExtendedFramePointerFlags() const override {
+ return ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp
index 863438793acf..96d3d1390a59 100644
--- a/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -186,7 +186,7 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) {
for (auto &Group : AllCmovGroups) {
// Skip any group that doesn't do at least one memory operand cmov.
- if (!llvm::any_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
+ if (llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
continue;
// For CMOV groups which we can rewrite and which contain a memory load,
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 93bc23006dc4..6a047838f0b5 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -191,8 +191,6 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
// Expand CALL_RVMARKER pseudo to call instruction, followed by the special
//"movq %rax, %rdi" marker.
- // TODO: Mark the sequence as bundle, to avoid passes moving other code
- // in between.
MachineInstr &MI = *MBBI;
MachineInstr *OriginalCall;
@@ -236,15 +234,23 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
// Emit call to ObjC runtime.
const uint32_t *RegMask =
TRI->getCallPreservedMask(*MBB.getParent(), CallingConv::C);
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32))
- .addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0)
- .addRegMask(RegMask)
- .addReg(X86::RAX,
- RegState::Implicit |
- (RAXImplicitDead ? (RegState::Dead | RegState::Define)
- : RegState::Define))
- .getInstr();
+ MachineInstr *RtCall =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32))
+ .addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0)
+ .addRegMask(RegMask)
+ .addReg(X86::RAX,
+ RegState::Implicit |
+ (RAXImplicitDead ? (RegState::Dead | RegState::Define)
+ : RegState::Define))
+ .getInstr();
MI.eraseFromParent();
+
+ auto &TM = MBB.getParent()->getTarget();
+ // On Darwin platforms, wrap the expanded sequence in a bundle to prevent
+ // later optimizations from breaking up the sequence.
+ if (TM.getTargetTriple().isOSDarwin())
+ finalizeBundle(MBB, OriginalCall->getIterator(),
+ std::next(RtCall->getIterator()));
}
/// If \p MBBI is a pseudo instruction, this method expands
diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp
index 87c04a07cd13..47874e82ff3b 100644
--- a/llvm/lib/Target/X86/X86FastTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp
@@ -134,11 +134,7 @@ bool X86FastTileConfig::isAMXInstr(MachineInstr &MI) {
if (MI.getOpcode() == X86::PLDTILECFGV || MI.isDebugInstr())
return false;
- for (MachineOperand &MO : MI.operands())
- if (isTilePhysReg(MO))
- return true;
-
- return false;
+ return llvm::any_of(MI.operands(), isTilePhysReg);
}
MachineInstr *X86FastTileConfig::getKeyAMXInstr(MachineInstr *MI) {
diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index e1d4b4c34772..16bff201dd03 100644
--- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -457,14 +457,12 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
OptForSize = MF.getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
- for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) {
- MachineInstr *MI = &*I;
-
- if (MachineInstr *NewMI = tryReplaceInstr(MI, MBB))
- MIReplacements.push_back(std::make_pair(MI, NewMI));
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (MachineInstr *NewMI = tryReplaceInstr(&MI, MBB))
+ MIReplacements.push_back(std::make_pair(&MI, NewMI));
// We're done with this instruction, update liveness for the next one.
- LiveRegs.stepBackward(*MI);
+ LiveRegs.stepBackward(MI);
}
while (!MIReplacements.empty()) {
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 4d9160f35226..2f0ab4ca9de4 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -1442,7 +1442,7 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
assert(UpdatedSlot < StackTop && Dest < 7);
Stack[UpdatedSlot] = Dest;
RegMap[Dest] = UpdatedSlot;
- MBB->getParent()->DeleteMachineInstr(&MI); // Remove the old instruction
+ MBB->getParent()->deleteMachineInstr(&MI); // Remove the old instruction
}
/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index c29ae9f6af4c..0a7aea467809 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -2496,8 +2496,8 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
}
// Assign slots for GPRs. It increases frame size.
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i - 1].getReg();
+ for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
+ unsigned Reg = I.getReg();
if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
continue;
@@ -2506,15 +2506,15 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
CalleeSavedFrameSize += SlotSize;
int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
- CSI[i - 1].setFrameIdx(SlotIndex);
+ I.setFrameIdx(SlotIndex);
}
X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
// Assign slots for XMMs.
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i - 1].getReg();
+ for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
+ unsigned Reg = I.getReg();
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
continue;
@@ -2533,7 +2533,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// spill into slot
SpillSlotOffset -= Size;
int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
- CSI[i - 1].setFrameIdx(SlotIndex);
+ I.setFrameIdx(SlotIndex);
MFI.ensureMaxAlignment(Alignment);
// Save the start offset and size of XMM in stack frame for funclets.
@@ -2559,8 +2559,8 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
// Push GPRs. It increases frame size.
const MachineFunction &MF = *MBB.getParent();
unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i - 1].getReg();
+ for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
+ unsigned Reg = I.getReg();
if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
continue;
@@ -2593,8 +2593,8 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
// Make XMM regs spilled. X86 does not have ability of push/pop XMM.
// It can be done by spilling XMMs to stack frame.
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
+ for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
+ unsigned Reg = I.getReg();
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
continue;
@@ -2607,8 +2607,7 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
MBB.addLiveIn(Reg);
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
- TRI);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI);
--MI;
MI->setFlag(MachineInstr::FrameSetup);
++MI;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 62b2387396be..6f6361b6757b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1091,17 +1091,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
+ if (VT == MVT::v2i64) continue;
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
}
- setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
- setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
-
- // With 512-bit registers or AVX512VL+BW, expanding (and promoting the
- // shifts) is better.
- if (!Subtarget.useAVX512Regs() &&
- !(Subtarget.hasBWI() && Subtarget.hasVLX()))
- setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
-
setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
@@ -1199,8 +1193,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
- MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
+ MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
+ }
// XOP can efficiently perform BITREVERSE with VPPERM.
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
@@ -1283,6 +1279,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
+ if (VT == MVT::v4i64) continue;
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
}
// These types need custom splitting if their input is a 128-bit vector.
@@ -1291,13 +1290,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
- setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
-
- // With BWI, expanding (and promoting the shifts) is the better.
- if (!Subtarget.useBWIRegs())
- setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
-
setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
@@ -1662,6 +1654,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
@@ -1676,16 +1670,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
- // With BWI, expanding (and promoting the shifts) is the better.
- if (!Subtarget.useBWIRegs())
- setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
-
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
@@ -5926,8 +5914,7 @@ static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
/// from position Pos and ending in Pos+Size is undef or is zero.
static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
unsigned Size) {
- return llvm::all_of(Mask.slice(Pos, Size),
- [](int M) { return isUndefOrZero(M); });
+ return llvm::all_of(Mask.slice(Pos, Size), isUndefOrZero);
}
/// Helper function to test whether a shuffle mask could be
@@ -6788,12 +6775,33 @@ void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
}
}
+// Attempt to constant fold, else just create a VECTOR_SHUFFLE.
+static SDValue getVectorShuffle(SelectionDAG &DAG, EVT VT, const SDLoc &dl,
+ SDValue V1, SDValue V2, ArrayRef<int> Mask) {
+ if ((ISD::isBuildVectorOfConstantSDNodes(V1.getNode()) || V1.isUndef()) &&
+ (ISD::isBuildVectorOfConstantSDNodes(V2.getNode()) || V2.isUndef())) {
+ SmallVector<SDValue> Ops(Mask.size(), DAG.getUNDEF(VT.getScalarType()));
+ for (int I = 0, NumElts = Mask.size(); I != NumElts; ++I) {
+ int M = Mask[I];
+ if (M < 0)
+ continue;
+ SDValue V = (M < NumElts) ? V1 : V2;
+ if (V.isUndef())
+ continue;
+ Ops[I] = V.getOperand(M % NumElts);
+ }
+ return DAG.getBuildVector(VT, dl, Ops);
+ }
+
+ return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
+}
+
/// Returns a vector_shuffle node for an unpackl operation.
static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
SDValue V1, SDValue V2) {
SmallVector<int, 8> Mask;
createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
- return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
+ return getVectorShuffle(DAG, VT, dl, V1, V2, Mask);
}
/// Returns a vector_shuffle node for an unpackh operation.
@@ -6801,12 +6809,11 @@ static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
SDValue V1, SDValue V2) {
SmallVector<int, 8> Mask;
createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
- return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
+ return getVectorShuffle(DAG, VT, dl, V1, V2, Mask);
}
/// Returns a node that packs the LHS + RHS nodes together at half width.
/// May return X86ISD::PACKSS/PACKUS, packing the top/bottom half.
-/// TODO: Add vXi64 -> vXi32 pack support with vector_shuffle node.
/// TODO: Add subvector splitting if/when we have a need for it.
static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget,
const SDLoc &dl, MVT VT, SDValue LHS, SDValue RHS,
@@ -6818,9 +6825,24 @@ static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget,
VT.getSizeInBits() == OpVT.getSizeInBits() &&
(EltSizeInBits * 2) == OpVT.getScalarSizeInBits() &&
"Unexpected PACK operand types");
- assert((EltSizeInBits == 8 || EltSizeInBits == 16) &&
+ assert((EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) &&
"Unexpected PACK result type");
+ // Rely on vector shuffles for vXi64 -> vXi32 packing.
+ if (EltSizeInBits == 32) {
+ SmallVector<int> PackMask;
+ int Offset = PackHiHalf ? 1 : 0;
+ int NumElts = VT.getVectorNumElements();
+ for (int I = 0; I != NumElts; I += 4) {
+ PackMask.push_back(I + Offset);
+ PackMask.push_back(I + Offset + 2);
+ PackMask.push_back(I + Offset + NumElts);
+ PackMask.push_back(I + Offset + NumElts + 2);
+ }
+ return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, LHS),
+ DAG.getBitcast(VT, RHS), PackMask);
+ }
+
// See if we already have sufficient leading bits for PACKSS/PACKUS.
if (!PackHiHalf) {
if (UsePackUS &&
@@ -15192,12 +15214,10 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
// need
// to balance this to ensure we don't form a 3-1 shuffle in the other
// half.
- int NumFlippedAToBInputs =
- std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) +
- std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1);
- int NumFlippedBToBInputs =
- std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) +
- std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1);
+ int NumFlippedAToBInputs = llvm::count(AToBInputs, 2 * ADWord) +
+ llvm::count(AToBInputs, 2 * ADWord + 1);
+ int NumFlippedBToBInputs = llvm::count(BToBInputs, 2 * BDWord) +
+ llvm::count(BToBInputs, 2 * BDWord + 1);
if ((NumFlippedAToBInputs == 1 &&
(NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) ||
(NumFlippedBToBInputs == 1 &&
@@ -25599,6 +25619,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
/// Handle vector element shifts where the shift amount may or may not be a
/// constant. Takes immediate version of shift as input.
+/// TODO: Replace with vector + (splat) idx to avoid extract_element nodes.
static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
SDValue SrcOp, SDValue ShAmt,
const X86Subtarget &Subtarget,
@@ -25606,11 +25627,6 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
MVT SVT = ShAmt.getSimpleValueType();
assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
- // Catch shift-by-constant.
- if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
- return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
- CShAmt->getZExtValue(), DAG);
-
// Change opcode to non-immediate version.
Opc = getTargetVShiftUniformOpcode(Opc, true);
@@ -26342,10 +26358,19 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32,
DAG.getBitcast(MVT::i16, Ins));
}
- case VSHIFT:
+ case VSHIFT: {
+ SDValue SrcOp = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+
+ // Catch shift-by-constant.
+ if (auto *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
+ return getTargetVShiftByConstNode(IntrData->Opc0, dl,
+ Op.getSimpleValueType(), SrcOp,
+ CShAmt->getZExtValue(), DAG);
+
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
- Op.getOperand(1), Op.getOperand(2), Subtarget,
- DAG);
+ SrcOp, ShAmt, Subtarget, DAG);
+ }
case COMPRESS_EXPAND_IN_REG: {
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
@@ -26638,7 +26663,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
X86CC = X86::COND_E;
break;
}
- SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
+ SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32);
SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps).getValue(2);
SDValue SetCC = getSETCC(X86CC, PCMP, dl, DAG);
@@ -26653,7 +26678,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else
Opcode = X86ISD::PCMPESTR;
- SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
+ SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps);
}
@@ -26666,7 +26691,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else
Opcode = X86ISD::PCMPESTR;
- SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
+ SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps).getValue(1);
}
@@ -28892,10 +28917,13 @@ SDValue X86TargetLowering::LowerWin64_INT128_TO_FP(SDValue Op,
// supported by the Subtarget
static bool supportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
+ if (!(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))
+ return false;
+
if (VT.getScalarSizeInBits() < 16)
return false;
- if (VT.is512BitVector() && Subtarget.hasAVX512() &&
+ if (VT.is512BitVector() && Subtarget.useAVX512Regs() &&
(VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI()))
return true;
@@ -28919,6 +28947,8 @@ bool supportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
// natively supported by the Subtarget
static bool supportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
+ if (!(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))
+ return false;
if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16)
return false;
@@ -28927,7 +28957,8 @@ static bool supportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())
return false;
- if (Subtarget.hasAVX512())
+ if (Subtarget.hasAVX512() &&
+ (Subtarget.useAVX512Regs() || !VT.is512BitVector()))
return true;
bool LShift = VT.is128BitVector() || VT.is256BitVector();
@@ -28935,8 +28966,8 @@ static bool supportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
return (Opcode == ISD::SRA) ? AShift : LShift;
}
-static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
@@ -29066,8 +29097,8 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
-static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
@@ -29166,28 +29197,20 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
(Subtarget.hasBWI() && VT == MVT::v64i8)))
return SDValue();
- if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
- SmallVector<SDValue, 8> Elts;
- MVT SVT = VT.getVectorElementType();
- unsigned SVTBits = SVT.getSizeInBits();
- APInt One(SVTBits, 1);
- unsigned NumElems = VT.getVectorNumElements();
-
- for (unsigned i = 0; i != NumElems; ++i) {
- SDValue Op = Amt->getOperand(i);
- if (Op->isUndef()) {
- Elts.push_back(Op);
- continue;
- }
+ MVT SVT = VT.getVectorElementType();
+ unsigned SVTBits = SVT.getSizeInBits();
+ unsigned NumElems = VT.getVectorNumElements();
- ConstantSDNode *ND = cast<ConstantSDNode>(Op);
- APInt C(SVTBits, ND->getZExtValue());
- uint64_t ShAmt = C.getZExtValue();
- if (ShAmt >= SVTBits) {
- Elts.push_back(DAG.getUNDEF(SVT));
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ if (getTargetConstantBitsFromNode(Amt, SVTBits, UndefElts, EltBits)) {
+ APInt One(SVTBits, 1);
+ SmallVector<SDValue> Elts(NumElems, DAG.getUNDEF(SVT));
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (UndefElts[I] || EltBits[I].uge(SVTBits))
continue;
- }
- Elts.push_back(DAG.getConstant(One.shl(ShAmt), dl, SVT));
+ uint64_t ShAmt = EltBits[I].getZExtValue();
+ Elts[I] = DAG.getConstant(One.shl(ShAmt), dl, SVT);
}
return DAG.getBuildVector(VT, dl, Elts);
}
@@ -29233,10 +29256,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
assert(VT.isVector() && "Custom lowering only for vector shifts!");
assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!");
- if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget))
+ if (SDValue V = LowerShiftByScalarImmediate(Op, DAG, Subtarget))
return V;
- if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
+ if (SDValue V = LowerShiftByScalarVariable(Op, DAG, Subtarget))
return V;
if (supportedVectorVarShift(VT, Subtarget, Opc))
@@ -29818,14 +29841,29 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
}
- assert(IsROTL && "Only ROTL supported");
+ SDValue Z = DAG.getConstant(0, DL, VT);
+
+ if (!IsROTL) {
+ // If the ISD::ROTR amount is constant, we're always better converting to
+ // ISD::ROTL.
+ if (SDValue NegAmt = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {Z, Amt}))
+ return DAG.getNode(ISD::ROTL, DL, VT, R, NegAmt);
+
+ // XOP targets always prefers ISD::ROTL.
+ if (Subtarget.hasXOP())
+ return DAG.getNode(ISD::ROTL, DL, VT, R,
+ DAG.getNode(ISD::SUB, DL, VT, Z, Amt));
+ }
+
+ // Split 256-bit integers on XOP/pre-AVX2 targets.
+ if (VT.is256BitVector() && (Subtarget.hasXOP() || !Subtarget.hasAVX2()))
+ return splitVectorIntBinary(Op, DAG);
// XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
// XOP implicitly uses modulo rotation amounts.
if (Subtarget.hasXOP()) {
- if (VT.is256BitVector())
- return splitVectorIntBinary(Op, DAG);
+ assert(IsROTL && "Only ROTL expected");
assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
// Attempt to rotate by immediate.
@@ -29839,55 +29877,89 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}
- // Split 256-bit integers on pre-AVX2 targets.
- if (VT.is256BitVector() && !Subtarget.hasAVX2())
- return splitVectorIntBinary(Op, DAG);
-
- assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
- ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8 ||
- VT == MVT::v32i16) &&
- Subtarget.hasAVX2())) &&
- "Only vXi32/vXi16/vXi8 vector rotates supported");
-
// Rotate by an uniform constant - expand back to shifts.
if (IsCstSplat)
return SDValue();
- bool IsSplatAmt = DAG.isSplatValue(Amt);
- SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
+ // Split 512-bit integers on non 512-bit BWI targets.
+ if (VT.is512BitVector() && !Subtarget.useBWIRegs())
+ return splitVectorIntBinary(Op, DAG);
- // v16i8/v32i8: Split rotation into rot4/rot2/rot1 stages and select by
- // the amount bit.
- if (EltSizeInBits == 8) {
- if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()))
- return SDValue();
+ assert(
+ (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
+ ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
+ Subtarget.hasAVX2()) ||
+ ((VT == MVT::v32i16 || VT == MVT::v64i8) && Subtarget.useBWIRegs())) &&
+ "Only vXi32/vXi16/vXi8 vector rotates supported");
- // Check for a hidden ISD::ROTR, vXi8 lowering can handle both, but we
- // currently hit infinite loops in legalization if we allow ISD::ROTR.
- // FIXME: Infinite ROTL<->ROTR legalization in TargetLowering::expandROT.
- SDValue HiddenROTRAmt;
- if (Amt.getOpcode() == ISD::SUB &&
- ISD::isBuildVectorAllZeros(Amt.getOperand(0).getNode()))
- HiddenROTRAmt = Amt.getOperand(1);
+ MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits);
+ MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2);
- MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
+ SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
- // If the amount is a splat, attempt to fold as unpack(x,x) << zext(y):
- // rotl(x,y) -> (((aext(x) << bw) | zext(x)) << (y & (bw-1))) >> bw.
- // rotr(x,y) -> (((aext(x) << bw) | zext(x)) >> (y & (bw-1))).
- if (SDValue BaseRotAmt = DAG.getSplatValue(DAG.getNode(
- ISD::AND, DL, VT, HiddenROTRAmt ? HiddenROTRAmt : Amt, AmtMask))) {
- unsigned ShiftX86Opc = HiddenROTRAmt ? X86ISD::VSRLI : X86ISD::VSHLI;
- BaseRotAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseRotAmt);
+ // Attempt to fold as unpack(x,x) << zext(splat(y)):
+ // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
+ // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
+ // TODO: Handle vXi16 cases.
+ if (EltSizeInBits == 8 || EltSizeInBits == 32) {
+ if (SDValue BaseRotAmt = DAG.getSplatValue(AmtMod)) {
+ unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI;
SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
+ BaseRotAmt = DAG.getZExtOrTrunc(BaseRotAmt, DL, MVT::i32);
Lo = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Lo, BaseRotAmt,
Subtarget, DAG);
Hi = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Hi, BaseRotAmt,
Subtarget, DAG);
- return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !HiddenROTRAmt);
+ return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL);
+ }
+ }
+
+ // v16i8/v32i8/v64i8: Split rotation into rot4/rot2/rot1 stages and select by
+ // the amount bit.
+ // TODO: We're doing nothing here that we couldn't do for funnel shifts.
+ if (EltSizeInBits == 8) {
+ bool IsConstAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
+ MVT WideVT =
+ MVT::getVectorVT(Subtarget.hasBWI() ? MVT::i16 : MVT::i32, NumElts);
+ unsigned ShiftOpc = IsROTL ? ISD::SHL : ISD::SRL;
+
+ // Attempt to fold as:
+ // rotl(x,y) -> (((aext(x) << bw) | zext(x)) << (y & (bw-1))) >> bw.
+ // rotr(x,y) -> (((aext(x) << bw) | zext(x)) >> (y & (bw-1))).
+ if (supportedVectorVarShift(WideVT, Subtarget, ShiftOpc) &&
+ supportedVectorShiftWithImm(WideVT, Subtarget, ShiftOpc)) {
+ // If we're rotating by constant, just use default promotion.
+ if (IsConstAmt)
+ return SDValue();
+ // See if we can perform this by widening to vXi16 or vXi32.
+ R = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, R);
+ R = DAG.getNode(
+ ISD::OR, DL, WideVT, R,
+ getTargetVShiftByConstNode(X86ISD::VSHLI, DL, WideVT, R, 8, DAG));
+ Amt = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, AmtMod);
+ R = DAG.getNode(ShiftOpc, DL, WideVT, R, Amt);
+ if (IsROTL)
+ R = getTargetVShiftByConstNode(X86ISD::VSRLI, DL, WideVT, R, 8, DAG);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, R);
}
+ // Attempt to fold as unpack(x,x) << zext(y):
+ // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
+ // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
+ if (IsConstAmt || supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) {
+ // See if we can perform this by unpacking to lo/hi vXi16.
+ SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
+ SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
+ SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z));
+ SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z));
+ SDValue Lo = DAG.getNode(ShiftOpc, DL, ExtVT, RLo, ALo);
+ SDValue Hi = DAG.getNode(ShiftOpc, DL, ExtVT, RHi, AHi);
+ return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL);
+ }
+ assert((VT == MVT::v16i8 || VT == MVT::v32i8) && "Unsupported vXi8 type");
+
// We don't need ModuloAmt here as we just peek at individual bits.
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (Subtarget.hasSSE41()) {
@@ -29907,15 +29979,15 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getSelect(DL, SelVT, C, V0, V1);
};
- // 'Hidden' ROTR is currently only profitable on AVX512 targets where we
- // have VPTERNLOG.
- unsigned ShiftLHS = ISD::SHL;
- unsigned ShiftRHS = ISD::SRL;
- if (HiddenROTRAmt && useVPTERNLOG(Subtarget, VT)) {
- std::swap(ShiftLHS, ShiftRHS);
- Amt = HiddenROTRAmt;
+ // ISD::ROTR is currently only profitable on AVX512 targets with VPTERNLOG.
+ if (!IsROTL && !useVPTERNLOG(Subtarget, VT)) {
+ Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt);
+ IsROTL = true;
}
+ unsigned ShiftLHS = IsROTL ? ISD::SHL : ISD::SRL;
+ unsigned ShiftRHS = IsROTL ? ISD::SRL : ISD::SHL;
+
// Turn 'a' into a mask suitable for VSELECT: a = a << 5;
// We can safely do this using i16 shifts as we're only interested in
// the 3 lower bits of each byte.
@@ -29952,18 +30024,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return SignBitSelect(VT, Amt, M, R);
}
- // ISD::ROT* uses modulo rotate amounts.
- if (SDValue BaseRotAmt = DAG.getSplatValue(Amt)) {
- // If the amount is a splat, perform the modulo BEFORE the splat,
- // this helps LowerScalarVariableShift to remove the splat later.
- Amt = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, BaseRotAmt);
- Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
- Amt = DAG.getVectorShuffle(VT, DL, Amt, DAG.getUNDEF(VT),
- SmallVector<int>(NumElts, 0));
- } else {
- Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
- }
-
+ bool IsSplatAmt = DAG.isSplatValue(Amt);
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
bool LegalVarShifts = supportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
supportedVectorVarShift(VT, Subtarget, ISD::SRL);
@@ -29971,13 +30032,25 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// Fallback for splats + all supported variable shifts.
// Fallback for non-constants AVX2 vXi16 as well.
if (IsSplatAmt || LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) {
+ Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
- SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
- SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR);
+ SDValue SHL = DAG.getNode(IsROTL ? ISD::SHL : ISD::SRL, DL, VT, R, Amt);
+ SDValue SRL = DAG.getNode(IsROTL ? ISD::SRL : ISD::SHL, DL, VT, R, AmtR);
return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
}
+ // Everything below assumes ISD::ROTL.
+ if (!IsROTL) {
+ Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt);
+ IsROTL = true;
+ }
+
+ // ISD::ROT* uses modulo rotate amounts.
+ Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
+
+ assert(IsROTL && "Only ROTL supported");
+
// As with shifts, attempt to convert the rotation amount to a multiplication
// factor, fallback to general expansion.
SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG);
@@ -32927,11 +33000,6 @@ bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
unsigned Bits = Ty->getScalarSizeInBits();
- // 8-bit shifts are always expensive, but versions with a scalar amount aren't
- // particularly cheaper than those without.
- if (Bits == 8)
- return false;
-
// XOP has v16i8/v8i16/v4i32/v2i64 variable vector shifts.
// Splitting for v32i8/v16i16 on XOP+AVX2 targets is still preferred.
if (Subtarget.hasXOP() &&
@@ -36249,9 +36317,10 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
(V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
- : !Subtarget.hasSSE2() ? MVT::v4f32
- : MaskVT;
+ if (MaskEltSize == 16)
+ SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
+ else
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
return true;
}
}
@@ -36300,9 +36369,10 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
- : !Subtarget.hasSSE2() ? MVT::v4f32
- : MaskVT;
+ if (MaskEltSize == 16)
+ SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
+ else
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
return true;
}
@@ -40981,6 +41051,28 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
Op, DemandedBits, DemandedElts, DAG, Depth);
}
+bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) const {
+ unsigned NumElts = DemandedElts.getBitWidth();
+ unsigned Opc = Op.getOpcode();
+
+ switch (Opc) {
+ case X86ISD::VBROADCAST:
+ case X86ISD::VBROADCAST_LOAD:
+ // TODO: Permit vXi64 types on 32-bit targets.
+ if (isTypeLegal(Op.getValueType().getVectorElementType())) {
+ UndefElts = APInt::getNullValue(NumElts);
+ return true;
+ }
+ return false;
+ }
+
+ return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts,
+ Depth);
+}
+
// Helper to peek through bitops/trunc/setcc to determine size of source vector.
// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
@@ -46204,25 +46296,27 @@ static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If this is SSE1 only convert to FAND to avoid scalarization.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
- return DAG.getBitcast(
- MVT::v4i32, DAG.getNode(X86ISD::FAND, SDLoc(N), MVT::v4f32,
- DAG.getBitcast(MVT::v4f32, N->getOperand(0)),
- DAG.getBitcast(MVT::v4f32, N->getOperand(1))));
+ return DAG.getBitcast(MVT::v4i32,
+ DAG.getNode(X86ISD::FAND, dl, MVT::v4f32,
+ DAG.getBitcast(MVT::v4f32, N0),
+ DAG.getBitcast(MVT::v4f32, N1)));
}
// Use a 32-bit and+zext if upper bits known zero.
- if (VT == MVT::i64 && Subtarget.is64Bit() &&
- !isa<ConstantSDNode>(N->getOperand(1))) {
+ if (VT == MVT::i64 && Subtarget.is64Bit() && !isa<ConstantSDNode>(N1)) {
APInt HiMask = APInt::getHighBitsSet(64, 32);
- if (DAG.MaskedValueIsZero(N->getOperand(1), HiMask) ||
- DAG.MaskedValueIsZero(N->getOperand(0), HiMask)) {
- SDLoc dl(N);
- SDValue LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(0));
- SDValue RHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(1));
+ if (DAG.MaskedValueIsZero(N1, HiMask) ||
+ DAG.MaskedValueIsZero(N0, HiMask)) {
+ SDValue LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N0);
+ SDValue RHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N1);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64,
DAG.getNode(ISD::AND, dl, MVT::i32, LHS, RHS));
}
@@ -46235,8 +46329,6 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
SmallVector<APInt, 2> SrcPartials;
if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps, &SrcPartials) &&
SrcOps.size() == 1) {
- SDLoc dl(N);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
@@ -46276,33 +46368,57 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
return R;
- // Attempt to recursively combine a bitmask AND with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
+ // Attempt to recursively combine a bitmask AND with shuffles.
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
+
+ // If either operand is a constant mask, then only the elements that aren't
+ // zero are actually demanded by the other operand.
+ auto SimplifyUndemandedElts = [&](SDValue Op, SDValue OtherOp) {
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ int NumElts = VT.getVectorNumElements();
+ int EltSizeInBits = VT.getScalarSizeInBits();
+ if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits))
+ return false;
+
+ APInt DemandedElts = APInt::getZero(NumElts);
+ for (int I = 0; I != NumElts; ++I)
+ if (!EltBits[I].isZero())
+ DemandedElts.setBit(I);
+
+ APInt KnownUndef, KnownZero;
+ return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef,
+ KnownZero, DCI);
+ };
+ if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
}
// Attempt to combine a scalar bitmask AND with an extracted shuffle.
if ((VT.getScalarSizeInBits() % 8) == 0 &&
- N->getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(N->getOperand(0).getOperand(1))) {
- SDValue BitMask = N->getOperand(1);
- SDValue SrcVec = N->getOperand(0).getOperand(0);
+ N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDValue BitMask = N1;
+ SDValue SrcVec = N0.getOperand(0);
EVT SrcVecVT = SrcVec.getValueType();
// Check that the constant bitmask masks whole bytes.
APInt UndefElts;
SmallVector<APInt, 64> EltBits;
- if (VT == SrcVecVT.getScalarType() &&
- N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) &&
+ if (VT == SrcVecVT.getScalarType() && N0->isOnlyUserOf(SrcVec.getNode()) &&
getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) &&
llvm::all_of(EltBits, [](const APInt &M) {
return M.isZero() || M.isAllOnes();
})) {
unsigned NumElts = SrcVecVT.getVectorNumElements();
unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8;
- unsigned Idx = N->getOperand(0).getConstantOperandVal(1);
+ unsigned Idx = N0.getConstantOperandVal(1);
// Create a root shuffle mask from the byte mask and the extracted index.
SmallVector<int, 16> ShuffleMask(NumElts * Scale, SM_SentinelUndef);
@@ -46318,8 +46434,8 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
X86::MaxShuffleCombineDepth,
/*HasVarMask*/ false, /*AllowVarCrossLaneMask*/ true,
/*AllowVarPerLaneMask*/ true, DAG, Subtarget))
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
- N->getOperand(0).getOperand(1));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Shuffle,
+ N0.getOperand(1));
}
}
@@ -46644,11 +46760,13 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If this is SSE1 only convert to FOR to avoid scalarization.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
return DAG.getBitcast(MVT::v4i32,
- DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32,
+ DAG.getNode(X86ISD::FOR, dl, MVT::v4f32,
DAG.getBitcast(MVT::v4f32, N0),
DAG.getBitcast(MVT::v4f32, N1)));
}
@@ -46660,8 +46778,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
SmallVector<APInt, 2> SrcPartials;
if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps, &SrcPartials) &&
SrcOps.size() == 1) {
- SDLoc dl(N);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
@@ -46707,7 +46823,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (NumElts >= 16 && N1.getOpcode() == X86ISD::KSHIFTL &&
N1.getConstantOperandAPInt(1) == HalfElts &&
DAG.MaskedValueIsZero(N0, APInt(1, 1), UpperElts)) {
- SDLoc dl(N);
return DAG.getNode(
ISD::CONCAT_VECTORS, dl, VT,
extractSubVector(N0, 0, DAG, dl, HalfElts),
@@ -46716,7 +46831,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (NumElts >= 16 && N0.getOpcode() == X86ISD::KSHIFTL &&
N0.getConstantOperandAPInt(1) == HalfElts &&
DAG.MaskedValueIsZero(N1, APInt(1, 1), UpperElts)) {
- SDLoc dl(N);
return DAG.getNode(
ISD::CONCAT_VECTORS, dl, VT,
extractSubVector(N1, 0, DAG, dl, HalfElts),
@@ -46724,11 +46838,36 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
}
}
- // Attempt to recursively combine an OR of shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
+ // Attempt to recursively combine an OR of shuffles.
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
+
+ // If either operand is a constant mask, then only the elements that aren't
+ // allones are actually demanded by the other operand.
+ auto SimplifyUndemandedElts = [&](SDValue Op, SDValue OtherOp) {
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ int NumElts = VT.getVectorNumElements();
+ int EltSizeInBits = VT.getScalarSizeInBits();
+ if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits))
+ return false;
+
+ APInt DemandedElts = APInt::getZero(NumElts);
+ for (int I = 0; I != NumElts; ++I)
+ if (!EltBits[I].isAllOnes())
+ DemandedElts.setBit(I);
+
+ APInt KnownUndef, KnownZero;
+ return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef,
+ KnownZero, DCI);
+ };
+ if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
}
// We should fold "masked merge" patterns when `andn` is not available.
@@ -52111,7 +52250,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
case X86ISD::VSHLI:
case X86ISD::VSRLI:
// Special case: SHL/SRL AVX1 V4i64 by 32-bits can lower as a shuffle.
- // TODO: Move this to LowerScalarImmediateShift?
+ // TODO: Move this to LowerShiftByScalarImmediate?
if (VT == MVT::v4i64 && !Subtarget.hasInt256() &&
llvm::all_of(Ops, [](SDValue Op) {
return Op.getConstantOperandAPInt(1) == 32;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 6805cb75f0f2..d1d6e319f16b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1100,6 +1100,12 @@ namespace llvm {
bool shouldSplatInsEltVarIndex(EVT VT) const override;
+ bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
+ // Converting to sat variants holds little benefit on X86 as we will just
+ // need to saturate the value back using fp arithmatic.
+ return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
+ }
+
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
@@ -1153,6 +1159,10 @@ namespace llvm {
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const override;
+ bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) const override;
+
const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
SDValue unwrapAddress(SDValue N) const override;
diff --git a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
index 732b2b1a5ada..6642f46e64b2 100644
--- a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -137,8 +137,10 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
Changed |= addENDBR(MBB, MBB.begin());
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
- if (I->isCall() && IsCallReturnTwice(I->getOperand(0)))
+ if (I->isCall() && I->getNumOperands() > 0 &&
+ IsCallReturnTwice(I->getOperand(0))) {
Changed |= addENDBR(MBB, std::next(I));
+ }
}
// Exception handle may indirectly jump to catch pad, So we should add
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 1db83033ba35..ecd4777c3533 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -9958,74 +9958,74 @@ multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb",
- WriteShuffle256, truncstorevi8,
+ WriteVPMOV256, truncstorevi8,
masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb",
- WriteShuffle256, truncstore_s_vi8,
+ WriteVPMOV256, truncstore_s_vi8,
masked_truncstore_s_vi8, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb",
- WriteShuffle256, truncstore_us_vi8,
+ WriteVPMOV256, truncstore_us_vi8,
masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
- WriteShuffle256, truncstorevi16,
+ WriteVPMOV256, truncstorevi16,
masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
- WriteShuffle256, truncstore_s_vi16,
+ WriteVPMOV256, truncstore_s_vi16,
masked_truncstore_s_vi16, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
- select_truncus, WriteShuffle256,
+ select_truncus, WriteVPMOV256,
truncstore_us_vi16, masked_truncstore_us_vi16,
X86vtruncus, X86vmtruncus>;
defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
- WriteShuffle256, truncstorevi32,
+ WriteVPMOV256, truncstorevi32,
masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
- WriteShuffle256, truncstore_s_vi32,
+ WriteVPMOV256, truncstore_s_vi32,
masked_truncstore_s_vi32, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
- select_truncus, WriteShuffle256,
+ select_truncus, WriteVPMOV256,
truncstore_us_vi32, masked_truncstore_us_vi32,
X86vtruncus, X86vmtruncus>;
defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
- WriteShuffle256, truncstorevi8,
+ WriteVPMOV256, truncstorevi8,
masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
- WriteShuffle256, truncstore_s_vi8,
+ WriteVPMOV256, truncstore_s_vi8,
masked_truncstore_s_vi8, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
- select_truncus, WriteShuffle256,
+ select_truncus, WriteVPMOV256,
truncstore_us_vi8, masked_truncstore_us_vi8,
X86vtruncus, X86vmtruncus>;
defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
- WriteShuffle256, truncstorevi16,
+ WriteVPMOV256, truncstorevi16,
masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
- WriteShuffle256, truncstore_s_vi16,
+ WriteVPMOV256, truncstore_s_vi16,
masked_truncstore_s_vi16, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
- select_truncus, WriteShuffle256,
+ select_truncus, WriteVPMOV256,
truncstore_us_vi16, masked_truncstore_us_vi16,
X86vtruncus, X86vmtruncus>;
defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
- WriteShuffle256, truncstorevi8,
+ WriteVPMOV256, truncstorevi8,
masked_truncstorevi8, X86vtrunc,
X86vmtrunc>;
defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
- WriteShuffle256, truncstore_s_vi8,
+ WriteVPMOV256, truncstore_s_vi8,
masked_truncstore_s_vi8, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
- select_truncus, WriteShuffle256,
+ select_truncus, WriteVPMOV256,
truncstore_us_vi8, masked_truncstore_us_vi8,
X86vtruncus, X86vmtruncus>;
@@ -10084,7 +10084,7 @@ defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
}
-multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
+multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
let ExeDomain = DestInfo.ExeDomain in {
@@ -10100,135 +10100,140 @@ multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSche
}
}
-multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
+multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+ X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasBWI] in {
- defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v8i16x_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
- defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i16x_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasBWI] in {
- defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v32i16_info,
v32i8x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
}
}
-multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
+multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+ X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
- defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info,
v16i8x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
- defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
- defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
}
}
-multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
+multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite sched,
+ X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
- defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
v16i8x_info, i16mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
- defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
v16i8x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
- defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
}
}
-multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
+multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
+ X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
- defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info,
v8i16x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
- defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
- defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info,
v16i16x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
}
}
-multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
+multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
+ X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
- defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
v8i16x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
- defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
v8i16x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
- defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
}
}
-multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
+multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
+ X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
+ PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
let Predicates = [HasVLX, HasAVX512] in {
- defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
+ defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
v4i32x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
- defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
+ defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
v4i32x_info, i128mem, LdFrag, OpNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
}
let Predicates = [HasAVX512] in {
- defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
+ defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
v8i32x_info, i256mem, LdFrag, OpNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
}
}
-defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
-defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
+defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
// Patterns that we also need any extend versions of. aext_vector_inreg
@@ -10523,21 +10528,22 @@ defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd
defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
+multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
!strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
[(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
- EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
+ EVEX, Sched<[Sched]>;
}
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
string OpcodeStr, Predicate prd> {
+// TODO - Replace WriteMove with WriteVecTrunc?
let Predicates = [prd] in
- defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
+ defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteMove>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
- defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
+ defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteMove>, EVEX_V256;
+ defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteMove>, EVEX_V128;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index ba52283b570d..7288ce812138 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -260,10 +260,10 @@ let isPseudo = 1, SchedRW = [WriteSystem] in {
// Pseudo instructions used by address sanitizer.
//===----------------------------------------------------------------------===//
let
- Defs = [R8, EFLAGS] in {
+ Defs = [R10, R11, EFLAGS] in {
def ASAN_CHECK_MEMACCESS : PseudoI<
- (outs), (ins GR64NoR8:$addr, i32imm:$accessinfo),
- [(int_asan_check_memaccess GR64NoR8:$addr, (i32 timm:$accessinfo))]>,
+ (outs), (ins GR64PLTSafe:$addr, i32imm:$accessinfo),
+ [(int_asan_check_memaccess GR64PLTSafe:$addr, (i32 timm:$accessinfo))]>,
Sched<[]>;
}
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 6d4ad08842c7..226349485238 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -529,11 +529,11 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::LZCNT16rr, X86::LZCNT16rm, 0 },
{ X86::LZCNT32rr, X86::LZCNT32rm, 0 },
{ X86::LZCNT64rr, X86::LZCNT64rm, 0 },
- { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, TB_ALIGN_16 },
- { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 },
- { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, TB_NO_REVERSE },
- { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, TB_ALIGN_16 },
- { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, TB_NO_REVERSE },
+ { X86::MMX_CVTPD2PIrr, X86::MMX_CVTPD2PIrm, TB_ALIGN_16 },
+ { X86::MMX_CVTPI2PDrr, X86::MMX_CVTPI2PDrm, 0 },
+ { X86::MMX_CVTPS2PIrr, X86::MMX_CVTPS2PIrm, TB_NO_REVERSE },
+ { X86::MMX_CVTTPD2PIrr, X86::MMX_CVTTPD2PIrm, TB_ALIGN_16 },
+ { X86::MMX_CVTTPS2PIrr, X86::MMX_CVTTPS2PIrm, TB_NO_REVERSE },
{ X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 },
{ X86::MMX_PABSBrr, X86::MMX_PABSBrm, 0 },
{ X86::MMX_PABSDrr, X86::MMX_PABSDrm, 0 },
@@ -1339,29 +1339,29 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::MINSDrr_Int, X86::MINSDrm_Int, TB_NO_REVERSE },
{ X86::MINSSrr, X86::MINSSrm, 0 },
{ X86::MINSSrr_Int, X86::MINSSrm_Int, TB_NO_REVERSE },
- { X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 },
- { X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 },
- { X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 },
- { X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 },
- { X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 },
- { X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 },
- { X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 },
- { X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 },
- { X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 },
- { X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 },
- { X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 },
- { X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 },
+ { X86::MMX_CVTPI2PSrr, X86::MMX_CVTPI2PSrm, 0 },
+ { X86::MMX_PACKSSDWrr, X86::MMX_PACKSSDWrm, 0 },
+ { X86::MMX_PACKSSWBrr, X86::MMX_PACKSSWBrm, 0 },
+ { X86::MMX_PACKUSWBrr, X86::MMX_PACKUSWBrm, 0 },
+ { X86::MMX_PADDBrr, X86::MMX_PADDBrm, 0 },
+ { X86::MMX_PADDDrr, X86::MMX_PADDDrm, 0 },
+ { X86::MMX_PADDQrr, X86::MMX_PADDQrm, 0 },
+ { X86::MMX_PADDSBrr, X86::MMX_PADDSBrm, 0 },
+ { X86::MMX_PADDSWrr, X86::MMX_PADDSWrm, 0 },
+ { X86::MMX_PADDUSBrr, X86::MMX_PADDUSBrm, 0 },
+ { X86::MMX_PADDUSWrr, X86::MMX_PADDUSWrm, 0 },
+ { X86::MMX_PADDWrr, X86::MMX_PADDWrm, 0 },
{ X86::MMX_PALIGNRrri, X86::MMX_PALIGNRrmi, 0 },
- { X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 },
- { X86::MMX_PANDirr, X86::MMX_PANDirm, 0 },
- { X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 },
- { X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 },
- { X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 },
- { X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 },
- { X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 },
- { X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 },
- { X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 },
- { X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 },
+ { X86::MMX_PANDNrr, X86::MMX_PANDNrm, 0 },
+ { X86::MMX_PANDrr, X86::MMX_PANDrm, 0 },
+ { X86::MMX_PAVGBrr, X86::MMX_PAVGBrm, 0 },
+ { X86::MMX_PAVGWrr, X86::MMX_PAVGWrm, 0 },
+ { X86::MMX_PCMPEQBrr, X86::MMX_PCMPEQBrm, 0 },
+ { X86::MMX_PCMPEQDrr, X86::MMX_PCMPEQDrm, 0 },
+ { X86::MMX_PCMPEQWrr, X86::MMX_PCMPEQWrm, 0 },
+ { X86::MMX_PCMPGTBrr, X86::MMX_PCMPGTBrm, 0 },
+ { X86::MMX_PCMPGTDrr, X86::MMX_PCMPGTDrm, 0 },
+ { X86::MMX_PCMPGTWrr, X86::MMX_PCMPGTWrm, 0 },
{ X86::MMX_PHADDDrr, X86::MMX_PHADDDrm, 0 },
{ X86::MMX_PHADDSWrr, X86::MMX_PHADDSWrm, 0 },
{ X86::MMX_PHADDWrr, X86::MMX_PHADDWrm, 0 },
@@ -1370,18 +1370,18 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::MMX_PHSUBWrr, X86::MMX_PHSUBWrm, 0 },
{ X86::MMX_PINSRWrr, X86::MMX_PINSRWrm, TB_NO_REVERSE },
{ X86::MMX_PMADDUBSWrr, X86::MMX_PMADDUBSWrm, 0 },
- { X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 },
- { X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 },
- { X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 },
- { X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 },
- { X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 },
+ { X86::MMX_PMADDWDrr, X86::MMX_PMADDWDrm, 0 },
+ { X86::MMX_PMAXSWrr, X86::MMX_PMAXSWrm, 0 },
+ { X86::MMX_PMAXUBrr, X86::MMX_PMAXUBrm, 0 },
+ { X86::MMX_PMINSWrr, X86::MMX_PMINSWrm, 0 },
+ { X86::MMX_PMINUBrr, X86::MMX_PMINUBrm, 0 },
{ X86::MMX_PMULHRSWrr, X86::MMX_PMULHRSWrm, 0 },
- { X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 },
- { X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 },
- { X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 },
- { X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 },
- { X86::MMX_PORirr, X86::MMX_PORirm, 0 },
- { X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 },
+ { X86::MMX_PMULHUWrr, X86::MMX_PMULHUWrm, 0 },
+ { X86::MMX_PMULHWrr, X86::MMX_PMULHWrm, 0 },
+ { X86::MMX_PMULLWrr, X86::MMX_PMULLWrm, 0 },
+ { X86::MMX_PMULUDQrr, X86::MMX_PMULUDQrm, 0 },
+ { X86::MMX_PORrr, X86::MMX_PORrm, 0 },
+ { X86::MMX_PSADBWrr, X86::MMX_PSADBWrm, 0 },
{ X86::MMX_PSHUFBrr, X86::MMX_PSHUFBrm, 0 },
{ X86::MMX_PSIGNBrr, X86::MMX_PSIGNBrm, 0 },
{ X86::MMX_PSIGNDrr, X86::MMX_PSIGNDrm, 0 },
@@ -1394,21 +1394,21 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 },
{ X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 },
{ X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 },
- { X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 },
- { X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 },
- { X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 },
- { X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 },
- { X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 },
- { X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 },
- { X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 },
- { X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 },
- { X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 },
- { X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 },
- { X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 },
- { X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, TB_NO_REVERSE },
- { X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, TB_NO_REVERSE },
- { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, TB_NO_REVERSE },
- { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 },
+ { X86::MMX_PSUBBrr, X86::MMX_PSUBBrm, 0 },
+ { X86::MMX_PSUBDrr, X86::MMX_PSUBDrm, 0 },
+ { X86::MMX_PSUBQrr, X86::MMX_PSUBQrm, 0 },
+ { X86::MMX_PSUBSBrr, X86::MMX_PSUBSBrm, 0 },
+ { X86::MMX_PSUBSWrr, X86::MMX_PSUBSWrm, 0 },
+ { X86::MMX_PSUBUSBrr, X86::MMX_PSUBUSBrm, 0 },
+ { X86::MMX_PSUBUSWrr, X86::MMX_PSUBUSWrm, 0 },
+ { X86::MMX_PSUBWrr, X86::MMX_PSUBWrm, 0 },
+ { X86::MMX_PUNPCKHBWrr, X86::MMX_PUNPCKHBWrm, 0 },
+ { X86::MMX_PUNPCKHDQrr, X86::MMX_PUNPCKHDQrm, 0 },
+ { X86::MMX_PUNPCKHWDrr, X86::MMX_PUNPCKHWDrm, 0 },
+ { X86::MMX_PUNPCKLBWrr, X86::MMX_PUNPCKLBWrm, TB_NO_REVERSE },
+ { X86::MMX_PUNPCKLDQrr, X86::MMX_PUNPCKLDQrm, TB_NO_REVERSE },
+ { X86::MMX_PUNPCKLWDrr, X86::MMX_PUNPCKLWDrm, TB_NO_REVERSE },
+ { X86::MMX_PXORrr, X86::MMX_PXORrm, 0 },
{ X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE },
{ X86::MOVSDrr, X86::MOVLPDrm, TB_NO_REVERSE },
{ X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index bb5637a31947..c379aa8d9258 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4088,8 +4088,8 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
Register SrcReg, Register SrcReg2,
int64_t ImmMask, int64_t ImmValue,
- const MachineInstr &OI, bool *IsSwapped,
- int64_t *ImmDelta) const {
+ const MachineInstr &OI,
+ bool *IsSwapped) const {
switch (OI.getOpcode()) {
case X86::CMP64rr:
case X86::CMP32rr:
@@ -4140,21 +4140,10 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
int64_t OIMask;
int64_t OIValue;
if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
- SrcReg == OISrcReg && ImmMask == OIMask) {
- if (OIValue == ImmValue) {
- *ImmDelta = 0;
- return true;
- } else if (static_cast<uint64_t>(ImmValue) ==
- static_cast<uint64_t>(OIValue) - 1) {
- *ImmDelta = -1;
- return true;
- } else if (static_cast<uint64_t>(ImmValue) ==
- static_cast<uint64_t>(OIValue) + 1) {
- *ImmDelta = 1;
- return true;
- } else {
- return false;
- }
+ SrcReg == OISrcReg && ImmMask == OIMask && OIValue == ImmValue) {
+ assert(SrcReg2 == X86::NoRegister && OISrcReg2 == X86::NoRegister &&
+ "should not have 2nd register");
+ return true;
}
}
return FlagI.isIdenticalTo(OI);
@@ -4404,7 +4393,6 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
bool ShouldUpdateCC = false;
bool IsSwapped = false;
X86::CondCode NewCC = X86::COND_INVALID;
- int64_t ImmDelta = 0;
// Search backward from CmpInstr for the next instruction defining EFLAGS.
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -4451,7 +4439,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// ... // EFLAGS not changed
// cmp x, y // <-- can be removed
if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
- Inst, &IsSwapped, &ImmDelta)) {
+ Inst, &IsSwapped)) {
Sub = &Inst;
break;
}
@@ -4485,7 +4473,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// It is safe to remove CmpInstr if EFLAGS is redefined or killed.
// If we are done with the basic block, we need to check whether EFLAGS is
// live-out.
- bool FlagsMayLiveOut = true;
+ bool IsSafe = false;
SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
MachineBasicBlock::iterator AfterCmpInstr =
std::next(MachineBasicBlock::iterator(CmpInstr));
@@ -4495,7 +4483,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// We should check the usage if this instruction uses and updates EFLAGS.
if (!UseEFLAGS && ModifyEFLAGS) {
// It is safe to remove CmpInstr if EFLAGS is updated again.
- FlagsMayLiveOut = false;
+ IsSafe = true;
break;
}
if (!UseEFLAGS && !ModifyEFLAGS)
@@ -4503,7 +4491,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// EFLAGS is used by this instruction.
X86::CondCode OldCC = X86::COND_INVALID;
- if (MI || IsSwapped || ImmDelta != 0) {
+ if (MI || IsSwapped) {
// We decode the condition code from opcode.
if (Instr.isBranch())
OldCC = X86::getCondFromBranch(Instr);
@@ -4555,60 +4543,11 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
// We swap the condition code and synthesize the new opcode.
ReplacementCC = getSwappedCondition(OldCC);
- if (ReplacementCC == X86::COND_INVALID) return false;
- ShouldUpdateCC = true;
- } else if (ImmDelta != 0) {
- unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg));
- // Shift amount for min/max constants to adjust for 8/16/32 instruction
- // sizes.
- switch (OldCC) {
- case X86::COND_L: // x <s (C + 1) --> x <=s C
- if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
- return false;
- ReplacementCC = X86::COND_LE;
- break;
- case X86::COND_B: // x <u (C + 1) --> x <=u C
- if (ImmDelta != 1 || CmpValue == 0)
- return false;
- ReplacementCC = X86::COND_BE;
- break;
- case X86::COND_GE: // x >=s (C + 1) --> x >s C
- if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
- return false;
- ReplacementCC = X86::COND_G;
- break;
- case X86::COND_AE: // x >=u (C + 1) --> x >u C
- if (ImmDelta != 1 || CmpValue == 0)
- return false;
- ReplacementCC = X86::COND_A;
- break;
- case X86::COND_G: // x >s (C - 1) --> x >=s C
- if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
- return false;
- ReplacementCC = X86::COND_GE;
- break;
- case X86::COND_A: // x >u (C - 1) --> x >=u C
- if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
- return false;
- ReplacementCC = X86::COND_AE;
- break;
- case X86::COND_LE: // x <=s (C - 1) --> x <s C
- if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
- return false;
- ReplacementCC = X86::COND_L;
- break;
- case X86::COND_BE: // x <=u (C - 1) --> x <u C
- if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
- return false;
- ReplacementCC = X86::COND_B;
- break;
- default:
+ if (ReplacementCC == X86::COND_INVALID)
return false;
- }
- ShouldUpdateCC = true;
}
- if (ShouldUpdateCC && ReplacementCC != OldCC) {
+ if ((ShouldUpdateCC || IsSwapped) && ReplacementCC != OldCC) {
// Push the MachineInstr to OpsToUpdate.
// If it is safe to remove CmpInstr, the condition code of these
// instructions will be modified.
@@ -4616,14 +4555,14 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
}
if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
// It is safe to remove CmpInstr if EFLAGS is updated again or killed.
- FlagsMayLiveOut = false;
+ IsSafe = true;
break;
}
}
- // If we have to update users but EFLAGS is live-out abort, since we cannot
- // easily find all of the users.
- if (ShouldUpdateCC && FlagsMayLiveOut) {
+ // If EFLAGS is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if ((MI || IsSwapped) && !IsSafe) {
for (MachineBasicBlock *Successor : CmpMBB.successors())
if (Successor->isLiveIn(X86::EFLAGS))
return false;
@@ -4944,7 +4883,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::SETB_C64r:
return Expand2AddrUndef(MIB, get(X86::SBB64rr));
case X86::MMX_SET0:
- return Expand2AddrUndef(MIB, get(X86::MMX_PXORirr));
+ return Expand2AddrUndef(MIB, get(X86::MMX_PXORrr));
case X86::V_SET0:
case X86::FsFLD0SS:
case X86::FsFLD0SD:
@@ -5217,12 +5156,12 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
bool ForLoadFold = false) {
// Set the OpNum parameter to the first source operand.
switch (Opcode) {
- case X86::MMX_PUNPCKHBWirr:
- case X86::MMX_PUNPCKHWDirr:
- case X86::MMX_PUNPCKHDQirr:
- case X86::MMX_PUNPCKLBWirr:
- case X86::MMX_PUNPCKLWDirr:
- case X86::MMX_PUNPCKLDQirr:
+ case X86::MMX_PUNPCKHBWrr:
+ case X86::MMX_PUNPCKHWDrr:
+ case X86::MMX_PUNPCKHDQrr:
+ case X86::MMX_PUNPCKLBWrr:
+ case X86::MMX_PUNPCKLWDrr:
+ case X86::MMX_PUNPCKLDQrr:
case X86::MOVHLPSrr:
case X86::PACKSSWBrr:
case X86::PACKUSWBrr:
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 33ce55bbdb2b..537ada6222bf 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -643,8 +643,7 @@ private:
/// CMP %1, %2 and %3 = SUB %2, %1 ; IsSwapped=true
bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg,
Register SrcReg2, int64_t ImmMask, int64_t ImmValue,
- const MachineInstr &OI, bool *IsSwapped,
- int64_t *ImmDelta) const;
+ const MachineInstr &OI, bool *IsSwapped) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td
index bb3e6df3bf3e..aeecc25ddea2 100644
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -34,14 +34,14 @@ let Constraints = "$src1 = $dst" in {
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
X86FoldableSchedWrite sched, bit Commutable = 0,
X86MemOperand OType = i64mem> {
- def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
Sched<[sched]> {
let isCommutable = Commutable;
}
- def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, OType:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, (load_mmx addr:$src2)))]>,
@@ -123,25 +123,25 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm, X86FoldableSchedWrite sched, Domain d> {
- def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))], d>,
- Sched<[sched]>;
- def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>,
- Sched<[sched.Folded]>;
+ def rr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], d>,
+ Sched<[sched]>;
+ def rm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>,
+ Sched<[sched.Folded]>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
PatFrag ld_frag, string asm, Domain d> {
- def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),
- (ins DstRC:$src1, SrcRC:$src2), asm,
- [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>,
- Sched<[WriteCvtI2PS]>;
- def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src2), asm,
- [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>,
- Sched<[WriteCvtI2PS.Folded]>;
+ def rr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),
+ (ins DstRC:$src1, SrcRC:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>,
+ Sched<[WriteCvtI2PS]>;
+ def rm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>,
+ Sched<[WriteCvtI2PS.Folded]>;
}
//===----------------------------------------------------------------------===//
@@ -569,14 +569,14 @@ def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
(MMX_MOVFR642Qrr FR64:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvtp2Int (v4f32 VR128:$src)))))),
- (MMX_CVTPS2PIirr VR128:$src)>;
+ (MMX_CVTPS2PIrr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))),
- (MMX_CVTTPS2PIirr VR128:$src)>;
+ (MMX_CVTTPS2PIrr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
- (MMX_CVTPD2PIirr VR128:$src)>;
+ (MMX_CVTPD2PIrr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
- (MMX_CVTTPD2PIirr VR128:$src)>;
+ (MMX_CVTTPD2PIrr VR128:$src)>;
}
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index c3cd634612a4..9044f10ec630 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -48,6 +48,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
+#include <string>
using namespace llvm;
@@ -1336,235 +1337,29 @@ void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
return;
}
- unsigned Reg = MI.getOperand(0).getReg().id();
+ const auto &Reg = MI.getOperand(0).getReg();
ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
- MCSymbol *&Sym =
- AsanMemaccessSymbols[AsanMemaccessTuple(Reg, AccessInfo.Packed)];
- if (!Sym) {
- std::string Name = AccessInfo.IsWrite ? "store" : "load";
- std::string SymName = "__asan_check_" + Name +
- utostr(1ULL << AccessInfo.AccessSizeIndex) + "_rn" +
- utostr(Reg);
- Sym = OutContext.getOrCreateSymbol(SymName);
- }
-
- EmitAndCountInstruction(
- MCInstBuilder(X86::CALL64pcrel32)
- .addExpr(MCSymbolRefExpr::create(Sym, OutContext)));
-}
-
-void X86AsmPrinter::emitAsanMemaccessPartial(Module &M, unsigned Reg,
- const ASanAccessInfo &AccessInfo,
- MCSubtargetInfo &STI) {
- assert(AccessInfo.AccessSizeIndex == 0 || AccessInfo.AccessSizeIndex == 1 ||
- AccessInfo.AccessSizeIndex == 2);
- assert(Reg != X86::R8);
-
uint64_t ShadowBase;
int MappingScale;
bool OrShadowOffset;
- getAddressSanitizerParams(
- Triple(M.getTargetTriple()), M.getDataLayout().getPointerSizeInBits(),
- AccessInfo.CompileKernel, &ShadowBase, &MappingScale, &OrShadowOffset);
-
- OutStreamer->emitInstruction(
- MCInstBuilder(X86::MOV64rr).addReg(X86::R8).addReg(X86::NoRegister + Reg),
- STI);
- OutStreamer->emitInstruction(MCInstBuilder(X86::SHR64ri)
- .addReg(X86::R8)
- .addReg(X86::R8)
- .addImm(MappingScale),
- STI);
- if (OrShadowOffset) {
- OutStreamer->emitInstruction(MCInstBuilder(X86::OR64ri32)
- .addReg(X86::R8)
- .addReg(X86::R8)
- .addImm(ShadowBase),
- STI);
- OutStreamer->emitInstruction(MCInstBuilder(X86::MOV8rm)
- .addReg(X86::R8B)
- .addReg(X86::R8)
- .addImm(1)
- .addReg(X86::NoRegister)
- .addImm(0)
- .addReg(X86::NoRegister),
- STI);
- OutStreamer->emitInstruction(
- MCInstBuilder(X86::TEST8rr).addReg(X86::R8B).addReg(X86::R8B), STI);
- } else {
- OutStreamer->emitInstruction(MCInstBuilder(X86::MOVSX32rm8)
- .addReg(X86::R8D)
- .addReg(X86::R8)
- .addImm(1)
- .addReg(X86::NoRegister)
- .addImm(ShadowBase)
- .addReg(X86::NoRegister),
- STI);
- OutStreamer->emitInstruction(
- MCInstBuilder(X86::TEST32rr).addReg(X86::R8D).addReg(X86::R8D), STI);
- }
- MCSymbol *AdditionalCheck = OutContext.createTempSymbol();
- OutStreamer->emitInstruction(
- MCInstBuilder(X86::JCC_1)
- .addExpr(MCSymbolRefExpr::create(AdditionalCheck, OutContext))
- .addImm(X86::COND_NE),
- STI);
- MCSymbol *ReturnSym = OutContext.createTempSymbol();
- OutStreamer->emitLabel(ReturnSym);
- OutStreamer->emitInstruction(MCInstBuilder(getRetOpcode(*Subtarget)), STI);
-
- // Shadow byte is non-zero so we need to perform additional checks.
- OutStreamer->emitLabel(AdditionalCheck);
- OutStreamer->emitInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RCX),
- STI);
- OutStreamer->emitInstruction(MCInstBuilder(X86::MOV64rr)
- .addReg(X86::RCX)
- .addReg(X86::NoRegister + Reg),
- STI);
- const size_t Granularity = 1ULL << MappingScale;
- OutStreamer->emitInstruction(MCInstBuilder(X86::AND32ri8)
- .addReg(X86::NoRegister)
- .addReg(X86::ECX)
- .addImm(Granularity - 1),
- STI);
- if (AccessInfo.AccessSizeIndex == 1) {
- OutStreamer->emitInstruction(MCInstBuilder(X86::ADD32ri8)
- .addReg(X86::NoRegister)
- .addReg(X86::ECX)
- .addImm(1),
- STI);
- } else if (AccessInfo.AccessSizeIndex == 2) {
- OutStreamer->emitInstruction(MCInstBuilder(X86::ADD32ri8)
- .addReg(X86::NoRegister)
- .addReg(X86::ECX)
- .addImm(3),
- STI);
- }
-
- OutStreamer->emitInstruction(
- MCInstBuilder(X86::CMP32rr).addReg(X86::ECX).addReg(X86::R8D).addImm(1),
- STI);
- OutStreamer->emitInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RCX),
- STI);
- OutStreamer->emitInstruction(
- MCInstBuilder(X86::JCC_1)
- .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext))
- .addImm(X86::COND_L),
- STI);
-
- emitAsanReportError(M, Reg, AccessInfo, STI);
-}
-
-void X86AsmPrinter::emitAsanMemaccessFull(Module &M, unsigned Reg,
- const ASanAccessInfo &AccessInfo,
- MCSubtargetInfo &STI) {
- assert(AccessInfo.AccessSizeIndex == 3 || AccessInfo.AccessSizeIndex == 4);
- assert(Reg != X86::R8);
-
- uint64_t ShadowBase;
- int MappingScale;
- bool OrShadowOffset;
- getAddressSanitizerParams(
- Triple(M.getTargetTriple()), M.getDataLayout().getPointerSizeInBits(),
- AccessInfo.CompileKernel, &ShadowBase, &MappingScale, &OrShadowOffset);
-
- OutStreamer->emitInstruction(
- MCInstBuilder(X86::MOV64rr).addReg(X86::R8).addReg(X86::NoRegister + Reg),
- STI);
- OutStreamer->emitInstruction(MCInstBuilder(X86::SHR64ri)
- .addReg(X86::R8)
- .addReg(X86::R8)
- .addImm(MappingScale),
- STI);
- if (OrShadowOffset) {
- OutStreamer->emitInstruction(MCInstBuilder(X86::OR64ri32)
- .addReg(X86::R8)
- .addReg(X86::R8)
- .addImm(ShadowBase),
- STI);
- auto OpCode = AccessInfo.AccessSizeIndex == 3 ? X86::CMP8mi : X86::CMP16mi8;
- OutStreamer->emitInstruction(MCInstBuilder(OpCode)
- .addReg(X86::R8)
- .addImm(1)
- .addReg(X86::NoRegister)
- .addImm(0)
- .addReg(X86::NoRegister)
- .addImm(0),
- STI);
- } else {
- auto OpCode = AccessInfo.AccessSizeIndex == 3 ? X86::CMP8mi : X86::CMP16mi8;
- OutStreamer->emitInstruction(MCInstBuilder(OpCode)
- .addReg(X86::R8)
- .addImm(1)
- .addReg(X86::NoRegister)
- .addImm(ShadowBase)
- .addReg(X86::NoRegister)
- .addImm(0),
- STI);
- }
- MCSymbol *ReportCode = OutContext.createTempSymbol();
- OutStreamer->emitInstruction(
- MCInstBuilder(X86::JCC_1)
- .addExpr(MCSymbolRefExpr::create(ReportCode, OutContext))
- .addImm(X86::COND_NE),
- STI);
- MCSymbol *ReturnSym = OutContext.createTempSymbol();
- OutStreamer->emitLabel(ReturnSym);
- OutStreamer->emitInstruction(MCInstBuilder(getRetOpcode(*Subtarget)), STI);
-
- OutStreamer->emitLabel(ReportCode);
- emitAsanReportError(M, Reg, AccessInfo, STI);
-}
+ getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64,
+ AccessInfo.CompileKernel, &ShadowBase,
+ &MappingScale, &OrShadowOffset);
-void X86AsmPrinter::emitAsanReportError(Module &M, unsigned Reg,
- const ASanAccessInfo &AccessInfo,
- MCSubtargetInfo &STI) {
std::string Name = AccessInfo.IsWrite ? "store" : "load";
- MCSymbol *ReportError = OutContext.getOrCreateSymbol(
- "__asan_report_" + Name + utostr(1ULL << AccessInfo.AccessSizeIndex));
- OutStreamer->emitInstruction(MCInstBuilder(X86::MOV64rr)
- .addReg(X86::RDI)
- .addReg(X86::NoRegister + Reg),
- STI);
- OutStreamer->emitInstruction(
- MCInstBuilder(X86::JMP_4)
- .addExpr(MCSymbolRefExpr::create(ReportError, MCSymbolRefExpr::VK_PLT,
- OutContext)),
- STI);
-}
-
-void X86AsmPrinter::emitAsanMemaccessSymbols(Module &M) {
- if (AsanMemaccessSymbols.empty())
- return;
-
- const Triple &TT = TM.getTargetTriple();
- assert(TT.isOSBinFormatELF());
- std::unique_ptr<MCSubtargetInfo> STI(
- TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
- assert(STI && "Unable to create subtarget info");
-
- for (auto &P : AsanMemaccessSymbols) {
- MCSymbol *Sym = P.second;
- OutStreamer->SwitchSection(OutContext.getELFSection(
- ".text.hot", ELF::SHT_PROGBITS,
- ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, Sym->getName(),
- /*IsComdat=*/true));
-
- OutStreamer->emitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
- OutStreamer->emitSymbolAttribute(Sym, MCSA_Weak);
- OutStreamer->emitSymbolAttribute(Sym, MCSA_Hidden);
- OutStreamer->emitLabel(Sym);
+ std::string Op = OrShadowOffset ? "or" : "add";
+ std::string SymName = "__asan_check_" + Name + "_" + Op + "_" +
+ utostr(1ULL << AccessInfo.AccessSizeIndex) + "_" +
+ TM.getMCRegisterInfo()->getName(Reg.asMCReg());
+ if (OrShadowOffset)
+ report_fatal_error(
+ "OrShadowOffset is not supported with optimized callbacks");
- unsigned Reg = std::get<0>(P.first);
- ASanAccessInfo AccessInfo(std::get<1>(P.first));
-
- if (AccessInfo.AccessSizeIndex < 3) {
- emitAsanMemaccessPartial(M, Reg, AccessInfo, *STI);
- } else {
- emitAsanMemaccessFull(M, Reg, AccessInfo, *STI);
- }
- }
+ EmitAndCountInstruction(
+ MCInstBuilder(X86::CALL64pcrel32)
+ .addExpr(MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol(SymName), OutContext)));
}
void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
@@ -2615,6 +2410,15 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
const X86RegisterInfo *RI =
MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+ if (MI->getOpcode() == X86::OR64rm) {
+ for (auto &Opd : MI->operands()) {
+ if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) ==
+ "swift_async_extendedFramePointerFlags") {
+ ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
+ }
+ }
+ }
+
// Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
// are compressed from EVEX encoding to VEX encoding.
if (TM.Options.MCOptions.ShowMCEncoding) {
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index d835f452b67e..1b704bcb8e08 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -430,11 +430,11 @@ def GR64 : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
-// GR64 - 64-bit GPRs without R8 and RIP. Could be used when emitting code for
-// intrinsics, which use implict input registers.
-def GR64NoR8 : RegisterClass<"X86", [i64], 64,
- (add RAX, RCX, RDX, RSI, RDI, R9, R10, R11,
- RBX, R14, R15, R12, R13, RBP, RSP)>;
+// GR64PLTSafe - 64-bit GPRs without R10, R11, RSP and RIP. Could be used when
+// emitting code for intrinsics, which use implict input registers.
+def GR64PLTSafe : RegisterClass<"X86", [i64], 64,
+ (add RAX, RCX, RDX, RSI, RDI, R8, R9,
+ RBX, R14, R15, R12, R13, RBP)>;
// Segment registers for use by MOV instructions (and others) that have a
// segment register as one operand. Always contain a 16-bit segment
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 2827981b7fb0..a6ff472aac6f 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -783,7 +783,7 @@ def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup27], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[BWWriteResGroup27], (instrs MMX_CVTPI2PSrr)>;
def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr",
"(V?)CVTDQ2PS(Y?)rr")>;
@@ -800,9 +800,9 @@ def BWWriteResGroup33 : SchedWriteRes<[BWPort5,BWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[BWWriteResGroup33], (instrs MMX_PACKSSDWirr,
- MMX_PACKSSWBirr,
- MMX_PACKUSWBirr)>;
+def: InstRW<[BWWriteResGroup33], (instrs MMX_PACKSSDWrr,
+ MMX_PACKSSWBrr,
+ MMX_PACKUSWBrr)>;
def BWWriteResGroup34 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 3;
@@ -862,9 +862,9 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup42], (instrs MMX_CVTPI2PDirr)>;
-def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIirr",
- "MMX_CVT(T?)PS2PIirr",
+def: InstRW<[BWWriteResGroup42], (instrs MMX_CVTPI2PDrr)>;
+def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIrr",
+ "MMX_CVT(T?)PS2PIrr",
"(V?)CVTDQ2PDrr",
"(V?)CVTPD2PSrr",
"(V?)CVTSD2SSrr",
@@ -1086,9 +1086,9 @@ def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[BWWriteResGroup79], (instrs MMX_PACKSSDWirm,
- MMX_PACKSSWBirm,
- MMX_PACKUSWBirm)>;
+def: InstRW<[BWWriteResGroup79], (instrs MMX_PACKSSDWrm,
+ MMX_PACKSSWBrm,
+ MMX_PACKUSWBrm)>;
def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> {
let Latency = 7;
@@ -1155,7 +1155,7 @@ def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup91], (instrs MMX_CVTPI2PSirm,
+def: InstRW<[BWWriteResGroup91], (instrs MMX_CVTPI2PSrm,
CVTDQ2PSrm,
VCVTDQ2PSrm)>;
def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>;
@@ -1236,8 +1236,8 @@ def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm,
CVTPD2DQrm,
CVTTPD2DQrm,
- MMX_CVTPI2PDirm)>;
-def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIirm",
+ MMX_CVTPI2PDrm)>;
+def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm",
"(V?)CVTDQ2PDrm",
"(V?)CVTSD2SSrm")>;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 68961d6245ab..371a9571ae39 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -995,7 +995,7 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSirm)>;
+def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSrm)>;
def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>;
def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
@@ -1164,9 +1164,9 @@ def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[HWWriteResGroup36_2], (instrs MMX_PACKSSDWirm,
- MMX_PACKSSWBirm,
- MMX_PACKUSWBirm)>;
+def: InstRW<[HWWriteResGroup36_2], (instrs MMX_PACKSSDWrm,
+ MMX_PACKSSWBrm,
+ MMX_PACKUSWBrm)>;
def HWWriteResGroup37 : SchedWriteRes<[HWPort23,HWPort0156]> {
let Latency = 7;
@@ -1240,7 +1240,7 @@ def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup50], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[HWWriteResGroup50], (instrs MMX_CVTPI2PSrr)>;
def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr",
"(V?)CVTDQ2PS(Y?)rr")>;
@@ -1285,9 +1285,9 @@ def HWWriteResGroup57 : SchedWriteRes<[HWPort5,HWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[HWWriteResGroup57], (instrs MMX_PACKSSDWirr,
- MMX_PACKSSWBirr,
- MMX_PACKUSWBirr)>;
+def: InstRW<[HWWriteResGroup57], (instrs MMX_PACKSSDWrr,
+ MMX_PACKSSWBrr,
+ MMX_PACKUSWBrr)>;
def HWWriteResGroup58 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 3;
@@ -1373,11 +1373,11 @@ def HWWriteResGroup73 : SchedWriteRes<[HWPort1,HWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPI2PDirr,
- MMX_CVTPD2PIirr,
- MMX_CVTPS2PIirr,
- MMX_CVTTPD2PIirr,
- MMX_CVTTPS2PIirr)>;
+def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPI2PDrr,
+ MMX_CVTPD2PIrr,
+ MMX_CVTPS2PIrr,
+ MMX_CVTTPD2PIrr,
+ MMX_CVTTPS2PIrr)>;
def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTDQ2PDrr",
"(V?)CVTPD2PSrr",
"(V?)CVTSD2SSrr",
@@ -1418,8 +1418,8 @@ def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
def: InstRW<[HWWriteResGroup78], (instrs CVTPD2PSrm,
CVTPD2DQrm,
CVTTPD2DQrm,
- MMX_CVTPD2PIirm,
- MMX_CVTTPD2PIirm,
+ MMX_CVTPD2PIrm,
+ MMX_CVTTPD2PIrm,
CVTDQ2PDrm,
VCVTDQ2PDrm)>;
@@ -1428,7 +1428,7 @@ def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDirm,
+def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm,
CVTSD2SSrm, CVTSD2SSrm_Int,
VCVTSD2SSrm, VCVTSD2SSrm_Int)>;
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 889b9b7fa666..789de9eb5751 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -331,12 +331,12 @@ defm : ICXWriteResPair<WriteFLogicZ, [ICXPort05], 1, [1], 1, 7>;
defm : ICXWriteResPair<WriteFTest, [ICXPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
defm : ICXWriteResPair<WriteFTestY, [ICXPort0], 2, [1], 1, 7>;
defm : ICXWriteResPair<WriteFTestZ, [ICXPort0], 2, [1], 1, 7>;
-defm : ICXWriteResPair<WriteFShuffle, [ICXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
-defm : ICXWriteResPair<WriteFShuffleY, [ICXPort5], 1, [1], 1, 7>;
-defm : ICXWriteResPair<WriteFShuffleZ, [ICXPort5], 1, [1], 1, 7>;
-defm : ICXWriteResPair<WriteFVarShuffle, [ICXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
-defm : ICXWriteResPair<WriteFVarShuffleY, [ICXPort5], 1, [1], 1, 7>;
-defm : ICXWriteResPair<WriteFVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector shuffles.
+defm : ICXWriteResPair<WriteFShuffleY, [ICXPort15], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFVarShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector variable shuffles.
+defm : ICXWriteResPair<WriteFVarShuffleY, [ICXPort15], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
defm : ICXWriteResPair<WriteFBlend, [ICXPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : ICXWriteResPair<WriteFBlendY,[ICXPort015], 1, [1], 1, 7>;
defm : ICXWriteResPair<WriteFBlendZ,[ICXPort015], 1, [1], 1, 7>;
@@ -388,14 +388,14 @@ defm : ICXWriteResPair<WriteVecIMulZ, [ICXPort05], 5, [1], 1, 7>;
defm : ICXWriteResPair<WritePMULLD, [ICXPort01], 10, [2], 2, 6>; // Vector PMULLD.
defm : ICXWriteResPair<WritePMULLDY, [ICXPort01], 10, [2], 2, 7>;
defm : ICXWriteResPair<WritePMULLDZ, [ICXPort05], 10, [2], 2, 7>;
-defm : ICXWriteResPair<WriteShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector shuffles.
-defm : ICXWriteResPair<WriteShuffleX, [ICXPort5], 1, [1], 1, 6>;
-defm : ICXWriteResPair<WriteShuffleY, [ICXPort5], 1, [1], 1, 7>;
-defm : ICXWriteResPair<WriteShuffleZ, [ICXPort5], 1, [1], 1, 7>;
-defm : ICXWriteResPair<WriteVarShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
-defm : ICXWriteResPair<WriteVarShuffleX, [ICXPort5], 1, [1], 1, 6>;
-defm : ICXWriteResPair<WriteVarShuffleY, [ICXPort5], 1, [1], 1, 7>;
-defm : ICXWriteResPair<WriteVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : ICXWriteResPair<WriteShuffleX, [ICXPort15], 1, [1], 1, 6>;
+defm : ICXWriteResPair<WriteShuffleY, [ICXPort15], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
+defm : ICXWriteResPair<WriteVarShuffleX, [ICXPort15], 1, [1], 1, 6>;
+defm : ICXWriteResPair<WriteVarShuffleY, [ICXPort15], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
defm : ICXWriteResPair<WriteBlend, [ICXPort5], 1, [1], 1, 6>; // Vector blends.
defm : ICXWriteResPair<WriteBlendY,[ICXPort5], 1, [1], 1, 7>;
defm : ICXWriteResPair<WriteBlendZ,[ICXPort5], 1, [1], 1, 7>;
@@ -642,15 +642,15 @@ def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
"KXOR(B|D|Q|W)rr",
"KSET0(B|D|Q|W)", // Same as KXOR
"KSET1(B|D|Q|W)", // Same as KXNOR
- "MMX_PADDS(B|W)irr",
- "MMX_PADDUS(B|W)irr",
- "MMX_PAVG(B|W)irr",
- "MMX_PCMPEQ(B|D|W)irr",
- "MMX_PCMPGT(B|D|W)irr",
- "MMX_P(MAX|MIN)SWirr",
- "MMX_P(MAX|MIN)UBirr",
- "MMX_PSUBS(B|W)irr",
- "MMX_PSUBUS(B|W)irr",
+ "MMX_PADDS(B|W)rr",
+ "MMX_PADDUS(B|W)rr",
+ "MMX_PAVG(B|W)rr",
+ "MMX_PCMPEQ(B|D|W)rr",
+ "MMX_PCMPGT(B|D|W)rr",
+ "MMX_P(MAX|MIN)SWrr",
+ "MMX_P(MAX|MIN)UBrr",
+ "MMX_PSUBS(B|W)rr",
+ "MMX_PSUBUS(B|W)rr",
"VPMOVB2M(Z|Z128|Z256)rr",
"VPMOVD2M(Z|Z128|Z256)rr",
"VPMOVQ2M(Z|Z128|Z256)rr",
@@ -663,7 +663,16 @@ def ICXWriteResGroup3 : SchedWriteRes<[ICXPort5]> {
}
def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r",
"KMOV(B|D|Q|W)kr",
- "UCOM_F(P?)r")>;
+ "UCOM_F(P?)r",
+ "VPBROADCAST(D|Q)rr",
+ "(V?)INSERTPS(Z?)rr",
+ "(V?)MOV(HL|LH)PS(Z?)rr",
+ "(V?)MOVDDUP(Y|Z|Z128|Z256)?rr",
+ "(V?)PALIGNR(Y|Z|Z128|Z256)?rri",
+ "(V?)PERMIL(PD|PS)(Y|Z|Z128|Z256)?ri",
+ "(V?)PERMIL(PD|PS)(Y|Z|Z128|Z256)?rr",
+ "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr",
+ "(V?)UNPCK(L|H)(PD|PS)(Y|Z|Z128|Z256)?rr")>;
def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> {
let Latency = 1;
@@ -702,6 +711,7 @@ def: InstRW<[ICXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
"VBLENDMPS(Z128|Z256)rr",
"VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr",
"(V?)PADD(B|D|Q|W)rr",
+ "(V?)MOV(SD|SS)(Z?)rr",
"VPBLENDD(Y?)rri",
"VPBLENDMB(Z128|Z256)rr",
"VPBLENDMD(Z128|Z256)rr",
@@ -892,9 +902,9 @@ def ICXWriteResGroup41 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[ICXWriteResGroup41], (instrs MMX_PACKSSDWirr,
- MMX_PACKSSWBirr,
- MMX_PACKUSWBirr)>;
+def: InstRW<[ICXWriteResGroup41], (instrs MMX_PACKSSDWrr,
+ MMX_PACKSSWBrr,
+ MMX_PACKUSWBrr)>;
def ICXWriteResGroup42 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
let Latency = 3;
@@ -1055,8 +1065,8 @@ def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIirr",
- "MMX_CVT(T?)PS2PIirr",
+def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr",
+ "MMX_CVT(T?)PS2PIrr",
"VCVTDQ2PDZ128rr",
"VCVTPD2DQZ128rr",
"(V?)CVT(T?)PD2DQrr",
@@ -1162,7 +1172,7 @@ def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[ICXWriteResGroup72], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[ICXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>;
def: InstRW<[ICXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
"VCOMPRESSPS(Z|Z128|Z256)rr",
"VPCOMPRESSD(Z|Z128|Z256)rr",
@@ -1174,26 +1184,26 @@ def ICXWriteResGroup73 : SchedWriteRes<[ICXPort0,ICXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[ICXWriteResGroup73], (instrs MMX_PADDSBirm,
- MMX_PADDSWirm,
- MMX_PADDUSBirm,
- MMX_PADDUSWirm,
- MMX_PAVGBirm,
- MMX_PAVGWirm,
- MMX_PCMPEQBirm,
- MMX_PCMPEQDirm,
- MMX_PCMPEQWirm,
- MMX_PCMPGTBirm,
- MMX_PCMPGTDirm,
- MMX_PCMPGTWirm,
- MMX_PMAXSWirm,
- MMX_PMAXUBirm,
- MMX_PMINSWirm,
- MMX_PMINUBirm,
- MMX_PSUBSBirm,
- MMX_PSUBSWirm,
- MMX_PSUBUSBirm,
- MMX_PSUBUSWirm)>;
+def: InstRW<[ICXWriteResGroup73], (instrs MMX_PADDSBrm,
+ MMX_PADDSWrm,
+ MMX_PADDUSBrm,
+ MMX_PADDUSWrm,
+ MMX_PAVGBrm,
+ MMX_PAVGWrm,
+ MMX_PCMPEQBrm,
+ MMX_PCMPEQDrm,
+ MMX_PCMPEQWrm,
+ MMX_PCMPGTBrm,
+ MMX_PCMPGTDrm,
+ MMX_PCMPGTWrm,
+ MMX_PMAXSWrm,
+ MMX_PMAXUBrm,
+ MMX_PMINSWrm,
+ MMX_PMINUBrm,
+ MMX_PSUBSBrm,
+ MMX_PSUBSWrm,
+ MMX_PSUBUSBrm,
+ MMX_PSUBUSWrm)>;
def ICXWriteResGroup76 : SchedWriteRes<[ICXPort6,ICXPort23]> {
let Latency = 6;
@@ -1295,20 +1305,14 @@ def ICXWriteResGroup92 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[ICXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
- "VMOVSSZrm(b?)")>;
-
-def ICXWriteResGroup92a : SchedWriteRes<[ICXPort5,ICXPort23]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[ICXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm",
- "(V?)PMOV(SX|ZX)BQrm",
- "(V?)PMOV(SX|ZX)BWrm",
- "(V?)PMOV(SX|ZX)DQrm",
- "(V?)PMOV(SX|ZX)WDrm",
- "(V?)PMOV(SX|ZX)WQrm")>;
+def: InstRW<[ICXWriteResGroup92], (instregex "VMOV(SD|SS)Zrm(b?)",
+ "VPBROADCAST(B|W)(Z128)?rm",
+ "(V?)INSERTPS(Z?)rm",
+ "(V?)PALIGNR(Z128)?rmi",
+ "(V?)PERMIL(PD|PS)(Z128)?m(b?)i",
+ "(V?)PERMIL(PD|PS)(Z128)?rm",
+ "(V?)PACK(U|S)S(DW|WB)(Z128)?rm",
+ "(V?)UNPCK(L|H)(PD|PS)(Z128)?rm")>;
def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort015]> {
let Latency = 7;
@@ -1391,9 +1395,9 @@ def ICXWriteResGroup96 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[ICXWriteResGroup96], (instrs MMX_PACKSSDWirm,
- MMX_PACKSSWBirm,
- MMX_PACKUSWBirm)>;
+def: InstRW<[ICXWriteResGroup96], (instrs MMX_PACKSSDWrm,
+ MMX_PACKSSWBrm,
+ MMX_PACKUSWBrm)>;
def ICXWriteResGroup97 : SchedWriteRes<[ICXPort5,ICXPort015]> {
let Latency = 7;
@@ -1546,7 +1550,12 @@ def ICXWriteResGroup119 : SchedWriteRes<[ICXPort5,ICXPort23]> {
}
def: InstRW<[ICXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
"VPBROADCASTB(Z|Z256)rm(b?)",
- "VPBROADCASTW(Z|Z256)rm(b?)")>;
+ "VPBROADCASTW(Z|Z256)rm(b?)",
+ "(V?)PALIGNR(Y|Z|Z256)rmi",
+ "(V?)PERMIL(PD|PS)(Y|Z|Z256)m(b?)i",
+ "(V?)PERMIL(PD|PS)(Y|Z|Z256)rm",
+ "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm",
+ "(V?)UNPCK(L|H)(PD|PS)(Y|Z|Z256)rm")>;
def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm,
VPBROADCASTWYrm,
VPMOVSXBDYrm,
@@ -1683,7 +1692,7 @@ def ICXWriteResGroup135 : SchedWriteRes<[ICXPort0,ICXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[ICXWriteResGroup135], (instrs MMX_CVTPI2PSirm)>;
+def: InstRW<[ICXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>;
def ICXWriteResGroup136 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 9;
@@ -1709,19 +1718,7 @@ def: InstRW<[ICXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
"VPMAXSQZ128rm(b?)",
"VPMAXUQZ128rm(b?)",
"VPMINSQZ128rm(b?)",
- "VPMINUQZ128rm(b?)",
- "VPMOVSXBDZ128rm(b?)",
- "VPMOVSXBQZ128rm(b?)",
- "VPMOVSXBWZ128rm(b?)",
- "VPMOVSXDQZ128rm(b?)",
- "VPMOVSXWDZ128rm(b?)",
- "VPMOVSXWQZ128rm(b?)",
- "VPMOVZXBDZ128rm(b?)",
- "VPMOVZXBQZ128rm(b?)",
- "VPMOVZXBWZ128rm(b?)",
- "VPMOVZXDQZ128rm(b?)",
- "VPMOVZXWDZ128rm(b?)",
- "VPMOVZXWQZ128rm(b?)")>;
+ "VPMINUQZ128rm(b?)")>;
def ICXWriteResGroup136_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
let Latency = 10;
@@ -1753,7 +1750,7 @@ def ICXWriteResGroup137 : SchedWriteRes<[ICXPort23,ICXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
+def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
"(V?)CVTPS2PDrm")>;
def ICXWriteResGroup143 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
@@ -1950,8 +1947,8 @@ def ICXWriteResGroup166 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2PSrm,
CVTPD2DQrm,
CVTTPD2DQrm,
- MMX_CVTPD2PIirm,
- MMX_CVTTPD2PIirm)>;
+ MMX_CVTPD2PIrm,
+ MMX_CVTTPD2PIrm)>;
def ICXWriteResGroup167 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
let Latency = 11;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index c8d7b0f72c1c..af5c0540deb5 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -623,7 +623,7 @@ def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
MMX_PABSDrr,
MMX_PABSWrr,
- MMX_PADDQirr,
+ MMX_PADDQrr,
MMX_PALIGNRrri,
MMX_PSIGNBrr,
MMX_PSIGNDrr,
@@ -870,7 +870,7 @@ def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup59], (instrs MMX_PADDQirm)>;
+def: InstRW<[SBWriteResGroup59], (instrs MMX_PADDQrm)>;
def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
let Latency = 7;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 7d3229c3b023..b3c13c72dd01 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -624,15 +624,15 @@ def SKLWriteResGroup1 : SchedWriteRes<[SKLPort0]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDS(B|W)irr",
- "MMX_PADDUS(B|W)irr",
- "MMX_PAVG(B|W)irr",
- "MMX_PCMPEQ(B|D|W)irr",
- "MMX_PCMPGT(B|D|W)irr",
- "MMX_P(MAX|MIN)SWirr",
- "MMX_P(MAX|MIN)UBirr",
- "MMX_PSUBS(B|W)irr",
- "MMX_PSUBUS(B|W)irr")>;
+def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDS(B|W)rr",
+ "MMX_PADDUS(B|W)rr",
+ "MMX_PAVG(B|W)rr",
+ "MMX_PCMPEQ(B|D|W)rr",
+ "MMX_PCMPGT(B|D|W)rr",
+ "MMX_P(MAX|MIN)SWrr",
+ "MMX_P(MAX|MIN)UBrr",
+ "MMX_PSUBS(B|W)rr",
+ "MMX_PSUBUS(B|W)rr")>;
def SKLWriteResGroup3 : SchedWriteRes<[SKLPort5]> {
let Latency = 1;
@@ -815,9 +815,9 @@ def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKLWriteResGroup39], (instrs MMX_PACKSSDWirr,
- MMX_PACKSSWBirr,
- MMX_PACKUSWBirr)>;
+def: InstRW<[SKLWriteResGroup39], (instrs MMX_PACKSSDWrr,
+ MMX_PACKSSWBrr,
+ MMX_PACKUSWBrr)>;
def SKLWriteResGroup40 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
let Latency = 3;
@@ -927,7 +927,7 @@ def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup59], (instrs MMX_CVTPI2PDirr,
+def: InstRW<[SKLWriteResGroup59], (instrs MMX_CVTPI2PDrr,
CVTDQ2PDrr,
VCVTDQ2PDrr)>;
@@ -936,8 +936,8 @@ def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PD2PIirr",
- "MMX_CVT(T?)PS2PIirr",
+def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PD2PIrr",
+ "MMX_CVT(T?)PS2PIrr",
"(V?)CVT(T?)PD2DQrr",
"(V?)CVTPD2PSrr",
"(V?)CVTPS2PDrr",
@@ -984,33 +984,33 @@ def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> {
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKLWriteResGroup68], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[SKLWriteResGroup68], (instrs MMX_CVTPI2PSrr)>;
def SKLWriteResGroup69 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup69], (instrs MMX_PADDSBirm,
- MMX_PADDSWirm,
- MMX_PADDUSBirm,
- MMX_PADDUSWirm,
- MMX_PAVGBirm,
- MMX_PAVGWirm,
- MMX_PCMPEQBirm,
- MMX_PCMPEQDirm,
- MMX_PCMPEQWirm,
- MMX_PCMPGTBirm,
- MMX_PCMPGTDirm,
- MMX_PCMPGTWirm,
- MMX_PMAXSWirm,
- MMX_PMAXUBirm,
- MMX_PMINSWirm,
- MMX_PMINUBirm,
- MMX_PSUBSBirm,
- MMX_PSUBSWirm,
- MMX_PSUBUSBirm,
- MMX_PSUBUSWirm)>;
+def: InstRW<[SKLWriteResGroup69], (instrs MMX_PADDSBrm,
+ MMX_PADDSWrm,
+ MMX_PADDUSBrm,
+ MMX_PADDUSWrm,
+ MMX_PAVGBrm,
+ MMX_PAVGWrm,
+ MMX_PCMPEQBrm,
+ MMX_PCMPEQDrm,
+ MMX_PCMPEQWrm,
+ MMX_PCMPGTBrm,
+ MMX_PCMPGTDrm,
+ MMX_PCMPGTWrm,
+ MMX_PMAXSWrm,
+ MMX_PMAXUBrm,
+ MMX_PMINSWrm,
+ MMX_PMINUBrm,
+ MMX_PSUBSBrm,
+ MMX_PSUBSWrm,
+ MMX_PSUBUSBrm,
+ MMX_PSUBUSWrm)>;
def SKLWriteResGroup70 : SchedWriteRes<[SKLPort0,SKLPort01]> {
let Latency = 6;
@@ -1144,9 +1144,9 @@ def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKLWriteResGroup92], (instrs MMX_PACKSSDWirm,
- MMX_PACKSSWBirm,
- MMX_PACKUSWBirm)>;
+def: InstRW<[SKLWriteResGroup92], (instrs MMX_PACKSSDWrm,
+ MMX_PACKSSWBrm,
+ MMX_PACKUSWBrm)>;
def SKLWriteResGroup94 : SchedWriteRes<[SKLPort23,SKLPort0156]> {
let Latency = 7;
@@ -1283,7 +1283,7 @@ def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup120], (instrs MMX_CVTPI2PSirm)>;
+def: InstRW<[SKLWriteResGroup120], (instrs MMX_CVTPI2PSrm)>;
def SKLWriteResGroup121 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 9;
@@ -1302,7 +1302,7 @@ def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIirm",
+def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIrm",
"(V?)CVTPS2PDrm")>;
def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
@@ -1345,7 +1345,7 @@ def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDirm)>;
+def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDrm)>;
def SKLWriteResGroup139 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let Latency = 10;
@@ -1425,8 +1425,8 @@ def SKLWriteResGroup152 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
def: InstRW<[SKLWriteResGroup152], (instrs CVTPD2PSrm,
CVTPD2DQrm,
CVTTPD2DQrm,
- MMX_CVTPD2PIirm,
- MMX_CVTTPD2PIirm)>;
+ MMX_CVTPD2PIrm,
+ MMX_CVTTPD2PIrm)>;
def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
let Latency = 11;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 1d8417aef41e..74f9da158353 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -634,15 +634,15 @@ def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
"KXOR(B|D|Q|W)rr",
"KSET0(B|D|Q|W)", // Same as KXOR
"KSET1(B|D|Q|W)", // Same as KXNOR
- "MMX_PADDS(B|W)irr",
- "MMX_PADDUS(B|W)irr",
- "MMX_PAVG(B|W)irr",
- "MMX_PCMPEQ(B|D|W)irr",
- "MMX_PCMPGT(B|D|W)irr",
- "MMX_P(MAX|MIN)SWirr",
- "MMX_P(MAX|MIN)UBirr",
- "MMX_PSUBS(B|W)irr",
- "MMX_PSUBUS(B|W)irr",
+ "MMX_PADDS(B|W)rr",
+ "MMX_PADDUS(B|W)rr",
+ "MMX_PAVG(B|W)rr",
+ "MMX_PCMPEQ(B|D|W)rr",
+ "MMX_PCMPGT(B|D|W)rr",
+ "MMX_P(MAX|MIN)SWrr",
+ "MMX_P(MAX|MIN)UBrr",
+ "MMX_PSUBS(B|W)rr",
+ "MMX_PSUBUS(B|W)rr",
"VPMOVB2M(Z|Z128|Z256)rr",
"VPMOVD2M(Z|Z128|Z256)rr",
"VPMOVQ2M(Z|Z128|Z256)rr",
@@ -884,9 +884,9 @@ def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWirr,
- MMX_PACKSSWBirr,
- MMX_PACKUSWBirr)>;
+def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWrr,
+ MMX_PACKSSWBrr,
+ MMX_PACKUSWBrr)>;
def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 3;
@@ -1047,8 +1047,8 @@ def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIirr",
- "MMX_CVT(T?)PS2PIirr",
+def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr",
+ "MMX_CVT(T?)PS2PIrr",
"VCVTDQ2PDZ128rr",
"VCVTPD2DQZ128rr",
"(V?)CVT(T?)PD2DQrr",
@@ -1154,7 +1154,7 @@ def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>;
def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
"VCOMPRESSPS(Z|Z128|Z256)rr",
"VPCOMPRESSD(Z|Z128|Z256)rr",
@@ -1166,26 +1166,26 @@ def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBirm,
- MMX_PADDSWirm,
- MMX_PADDUSBirm,
- MMX_PADDUSWirm,
- MMX_PAVGBirm,
- MMX_PAVGWirm,
- MMX_PCMPEQBirm,
- MMX_PCMPEQDirm,
- MMX_PCMPEQWirm,
- MMX_PCMPGTBirm,
- MMX_PCMPGTDirm,
- MMX_PCMPGTWirm,
- MMX_PMAXSWirm,
- MMX_PMAXUBirm,
- MMX_PMINSWirm,
- MMX_PMINUBirm,
- MMX_PSUBSBirm,
- MMX_PSUBSWirm,
- MMX_PSUBUSBirm,
- MMX_PSUBUSWirm)>;
+def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBrm,
+ MMX_PADDSWrm,
+ MMX_PADDUSBrm,
+ MMX_PADDUSWrm,
+ MMX_PAVGBrm,
+ MMX_PAVGWrm,
+ MMX_PCMPEQBrm,
+ MMX_PCMPEQDrm,
+ MMX_PCMPEQWrm,
+ MMX_PCMPGTBrm,
+ MMX_PCMPGTDrm,
+ MMX_PCMPGTWrm,
+ MMX_PMAXSWrm,
+ MMX_PMAXUBrm,
+ MMX_PMINSWrm,
+ MMX_PMINUBrm,
+ MMX_PSUBSBrm,
+ MMX_PSUBSWrm,
+ MMX_PSUBUSBrm,
+ MMX_PSUBUSWrm)>;
def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> {
let Latency = 6;
@@ -1383,9 +1383,9 @@ def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWirm,
- MMX_PACKSSWBirm,
- MMX_PACKUSWBirm)>;
+def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWrm,
+ MMX_PACKSSWBrm,
+ MMX_PACKUSWBrm)>;
def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 7;
@@ -1675,7 +1675,7 @@ def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSirm)>;
+def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>;
def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 9;
@@ -1701,19 +1701,7 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
"VPMAXSQZ128rm(b?)",
"VPMAXUQZ128rm(b?)",
"VPMINSQZ128rm(b?)",
- "VPMINUQZ128rm(b?)",
- "VPMOVSXBDZ128rm(b?)",
- "VPMOVSXBQZ128rm(b?)",
- "VPMOVSXBWZ128rm(b?)",
- "VPMOVSXDQZ128rm(b?)",
- "VPMOVSXWDZ128rm(b?)",
- "VPMOVSXWQZ128rm(b?)",
- "VPMOVZXBDZ128rm(b?)",
- "VPMOVZXBQZ128rm(b?)",
- "VPMOVZXBWZ128rm(b?)",
- "VPMOVZXDQZ128rm(b?)",
- "VPMOVZXWDZ128rm(b?)",
- "VPMOVZXWQZ128rm(b?)")>;
+ "VPMINUQZ128rm(b?)")>;
def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 10;
@@ -1745,7 +1733,7 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
+def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
"(V?)CVTPS2PDrm")>;
def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
@@ -1942,8 +1930,8 @@ def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2PSrm,
CVTPD2DQrm,
CVTTPD2DQrm,
- MMX_CVTPD2PIirm,
- MMX_CVTTPD2PIirm)>;
+ MMX_CVTPD2PIrm,
+ MMX_CVTTPD2PIrm)>;
def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 11;
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 6fd98280f560..0fedfc01092c 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -320,30 +320,30 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
// Conversions.
////////////////////////////////////////////////////////////////////////////////
-defm : AtomWriteResPair<WriteCvtSS2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 8, 9, [7,7], [6,6]>;
-defm : AtomWriteResPair<WriteCvtPS2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
+defm : AtomWriteResPair<WriteCvtSS2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 8, 9, [8,8], [9,9], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPS2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 3, 4>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IY>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
-defm : AtomWriteResPair<WriteCvtSD2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 8, 9, [7,7], [6,6]>;
-defm : AtomWriteResPair<WriteCvtPD2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7]>;
+defm : AtomWriteResPair<WriteCvtSD2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 8, 9, [8,8],[10,10], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPD2I, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [7,7], [8,8], 4, 5>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IY>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
-defm : AtomWriteResPair<WriteCvtI2SS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
-defm : AtomWriteResPair<WriteCvtI2PS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
+defm : AtomWriteResPair<WriteCvtI2SS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [6,6], 3, 1>;
+defm : AtomWriteResPair<WriteCvtI2PS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 3, 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
-defm : AtomWriteResPair<WriteCvtI2SD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
-defm : AtomWriteResPair<WriteCvtI2PD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7]>;
+defm : AtomWriteResPair<WriteCvtI2SD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 3, 3>;
+defm : AtomWriteResPair<WriteCvtI2PD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7], 3, 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDY>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
-defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
-defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7]>;
+defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7], 4, 5>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDY>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
-defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
-defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [6,6], [7,7]>;
+defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 10, 11,[10,10],[12,12], 3, 4>;
+defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 11, 12,[11,11],[12,12], 4, 5>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
@@ -525,8 +525,8 @@ def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
let Latency = 5;
let ResourceCycles = [5];
}
-def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSirr, MMX_CVTPI2PSirm,
- MMX_CVTPS2PIirr, MMX_CVTTPS2PIirr)>;
+def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSrr, MMX_CVTPI2PSrm,
+ MMX_CVTPS2PIrr, MMX_CVTTPS2PIrr)>;
// Port0 and Port1
def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
@@ -547,9 +547,43 @@ def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
let Latency = 5;
let ResourceCycles = [5, 5];
}
-def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIirm, MMX_CVTTPS2PIirm)>;
+def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIrm, MMX_CVTTPS2PIrm)>;
def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
+def AtomWrite0_1_7 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+ let Latency = 7;
+ let ResourceCycles = [6,6];
+}
+def : InstRW<[AtomWrite0_1_7], (instregex "CVTSI642SDrm(_Int)?")>;
+
+def AtomWrite0_1_7_4 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+ let Latency = 7;
+ let ResourceCycles = [8,8];
+ let NumMicroOps = 4;
+}
+def : InstRW<[AtomWrite0_1_7_4], (instregex "CVTSI642SSrr(_Int)?")>;
+
+def AtomWrite0_1_8_4 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+ let Latency = 8;
+ let ResourceCycles = [8,8];
+ let NumMicroOps = 4;
+}
+def : InstRW<[AtomWrite0_1_7_4], (instregex "CVTSI642SSrm(_Int)?")>;
+
+def AtomWrite0_1_9 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+ let Latency = 9;
+ let ResourceCycles = [9,9];
+ let NumMicroOps = 4;
+}
+def : InstRW<[AtomWrite0_1_9], (instregex "CVT(T)?SS2SI64rr(_Int)?")>;
+
+def AtomWrite0_1_10 : SchedWriteRes<[AtomPort0,AtomPort1]> {
+ let Latency = 10;
+ let ResourceCycles = [11,11];
+ let NumMicroOps = 5;
+}
+def : InstRW<[AtomWrite0_1_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
+
// Port0 or Port1
def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
let Latency = 1;
@@ -570,7 +604,7 @@ def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
SCASB, SCASL, SCASQ, SCASW)>;
def : InstRW<[AtomWrite01_2], (instregex "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
"(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
- "MMX_P(ADD|SUB)Qirr",
+ "MMX_P(ADD|SUB)Qrr",
"MOV(S|Z)X16rr8",
"MOV(UPS|UPD|DQU)mr",
"MASKMOVDQU(64)?",
@@ -589,7 +623,7 @@ def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
"XCHG(8|16|32|64)rm",
"PH(ADD|SUB)Drr",
"MOV(S|Z)X16rm8",
- "MMX_P(ADD|SUB)Qirm",
+ "MMX_P(ADD|SUB)Qrm",
"MOV(UPS|UPD|DQU)rm",
"P(ADD|SUB)Qrm")>;
@@ -647,15 +681,13 @@ def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
SHLD64mri8, SHRD64mri8,
SHLD64rri8, SHRD64rri8,
CMPXCHG8rr)>;
-def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
- "CVT(T)?SS2SI64rr(_Int)?")>;
+def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F")>;
def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
let Latency = 10;
let ResourceCycles = [10];
}
def : SchedAlias<WriteFLDC, AtomWrite01_10>;
-def : InstRW<[AtomWrite01_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
let Latency = 11;
diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
index 4c16b5b52b1d..0f6f24f9f1fe 100644
--- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -1008,11 +1008,11 @@ defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU0, PdFPCVT, PdFPSTO], 8,
defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
-def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
+def PdWriteMMX_CVTTPD2PIrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
let Latency = 6;
let NumMicroOps = 2;
}
-def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>;
+def : InstRW<[PdWriteMMX_CVTTPD2PIrr], (instrs MMX_CVTTPD2PIrr)>;
// FIXME: f+3 ST, LD+STC latency
defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
@@ -1048,18 +1048,18 @@ defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8,
defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
-def PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
+def PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
let Latency = 6;
let NumMicroOps = 2;
}
-def : InstRW<[PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr,
- MMX_CVTPI2PDirr)>;
+def : InstRW<[PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr], (instrs MMX_CVTPD2PIrr,
+ MMX_CVTPI2PDrr)>;
-def PdWriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
+def PdWriteMMX_CVTPI2PSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
let Latency = 4;
let NumMicroOps = 2;
}
-def : InstRW<[PdWriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>;
+def : InstRW<[PdWriteMMX_CVTPI2PSrr], (instrs MMX_CVTPI2PSrr)>;
defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>;
defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>;
@@ -1365,7 +1365,7 @@ def PdWriteVZeroIdiomLogic : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]>
]>;
-def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
+def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORrr, MMX_PANDNrr)>;
def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
@@ -1378,11 +1378,11 @@ def PdWriteVZeroIdiomALU : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]>
]>;
-def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
- MMX_PSUBQirr, MMX_PSUBWirr,
- MMX_PCMPGTBirr,
- MMX_PCMPGTDirr,
- MMX_PCMPGTWirr)>;
+def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBrr, MMX_PSUBDrr,
+ MMX_PSUBQrr, MMX_PSUBWrr,
+ MMX_PCMPGTBrr,
+ MMX_PCMPGTDrr,
+ MMX_PCMPGTWrr)>;
def PdWriteVZeroIdiomALUX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
@@ -1408,10 +1408,10 @@ def : IsZeroIdiomFunction<[
// MMX Zero-idioms.
DepBreakingClass<[
- MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr,
- MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr,
- MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr,
- MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr
+ MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr,
+ MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr,
+ MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr,
+ MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr
], ZeroIdiomPredicate>,
// SSE Zero-idioms.
@@ -1449,7 +1449,7 @@ def : IsDepBreakingFunction<[
// MMX
DepBreakingClass<[
- MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr
+ MMX_PCMPEQBrr, MMX_PCMPEQDrr, MMX_PCMPEQWrr
], ZeroIdiomPredicate>,
// SSE
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 68ebaa244acf..a070da34cab5 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -888,7 +888,7 @@ def JWriteVZeroIdiomLogic : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecLogic]>
]>;
-def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
+def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORrr, MMX_PANDNrr)>;
def JWriteVZeroIdiomLogicX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
@@ -901,12 +901,12 @@ def JWriteVZeroIdiomALU : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecALU]>
]>;
-def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
- MMX_PSUBQirr, MMX_PSUBWirr,
- MMX_PSUBSBirr, MMX_PSUBSWirr,
- MMX_PSUBUSBirr, MMX_PSUBUSWirr,
- MMX_PCMPGTBirr, MMX_PCMPGTDirr,
- MMX_PCMPGTWirr)>;
+def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBrr, MMX_PSUBDrr,
+ MMX_PSUBQrr, MMX_PSUBWrr,
+ MMX_PSUBSBrr, MMX_PSUBSWrr,
+ MMX_PSUBUSBrr, MMX_PSUBUSWrr,
+ MMX_PCMPGTBrr, MMX_PCMPGTDrr,
+ MMX_PCMPGTWrr)>;
def JWriteVZeroIdiomALUX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
@@ -974,10 +974,10 @@ def : IsZeroIdiomFunction<[
// MMX Zero-idioms.
DepBreakingClass<[
- MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr,
- MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr,
- MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr,
- MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr
+ MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr,
+ MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr,
+ MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr,
+ MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr
], ZeroIdiomPredicate>,
// SSE Zero-idioms.
@@ -1017,7 +1017,7 @@ def : IsDepBreakingFunction<[
// MMX
DepBreakingClass<[
- MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr
+ MMX_PCMPEQBrr, MMX_PCMPEQDrr, MMX_PCMPEQWrr
], ZeroIdiomPredicate>,
// SSE
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 5af9835f75a7..36e5b55a4194 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -467,8 +467,8 @@ def SLMWriteResGroup1rr : SchedWriteRes<[SLM_FPC_RSV01]> {
let NumMicroOps = 2;
let ResourceCycles = [8];
}
-def: InstRW<[SLMWriteResGroup1rr], (instrs MMX_PADDQirr, PADDQrr,
- MMX_PSUBQirr, PSUBQrr,
+def: InstRW<[SLMWriteResGroup1rr], (instrs MMX_PADDQrr, PADDQrr,
+ MMX_PSUBQrr, PSUBQrr,
PCMPEQQrr)>;
def SLMWriteResGroup1rm : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV01]> {
@@ -476,8 +476,8 @@ def SLMWriteResGroup1rm : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV01]> {
let NumMicroOps = 3;
let ResourceCycles = [1,8];
}
-def: InstRW<[SLMWriteResGroup1rm], (instrs MMX_PADDQirm, PADDQrm,
- MMX_PSUBQirm, PSUBQrm,
+def: InstRW<[SLMWriteResGroup1rm], (instrs MMX_PADDQrm, PADDQrm,
+ MMX_PSUBQrm, PSUBQrm,
PCMPEQQrm)>;
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 8e30e5e10ca8..4343e1ed45d1 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -1000,12 +1000,12 @@ def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> {
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteFPU12], (instrs MMX_PACKSSDWirr,
- MMX_PACKSSWBirr,
- MMX_PACKUSWBirr)>;
-def : InstRW<[ZnWriteFPU12m], (instrs MMX_PACKSSDWirm,
- MMX_PACKSSWBirm,
- MMX_PACKUSWBirm)>;
+def : InstRW<[ZnWriteFPU12], (instrs MMX_PACKSSDWrr,
+ MMX_PACKSSWBrr,
+ MMX_PACKUSWBrr)>;
+def : InstRW<[ZnWriteFPU12m], (instrs MMX_PACKSSDWrm,
+ MMX_PACKSSWBrm,
+ MMX_PACKUSWBrm)>;
def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> {
@@ -1305,15 +1305,15 @@ def ZnWriteCVTPS2PIr: SchedWriteRes<[ZnFPU3]> {
}
// CVT(T)PS2PI.
// mm,x.
-def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>;
+def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIrr")>;
// CVTPI2PD.
// x,mm.
-def : InstRW<[ZnWriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>;
+def : InstRW<[ZnWriteCVTPS2PDr], (instrs MMX_CVTPI2PDrr)>;
// CVT(T)PD2PI.
// mm,x.
-def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
+def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIrr")>;
def ZnWriteCVSTSI2SSr: SchedWriteRes<[ZnFPU3]> {
let Latency = 5;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index a83c89e2f28a..96d2837880c7 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -1012,12 +1012,12 @@ def Zn2WriteFPU12Ym : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
let NumMicroOps = 2;
}
-def : InstRW<[Zn2WriteFPU12], (instrs MMX_PACKSSDWirr,
- MMX_PACKSSWBirr,
- MMX_PACKUSWBirr)>;
-def : InstRW<[Zn2WriteFPU12m], (instrs MMX_PACKSSDWirm,
- MMX_PACKSSWBirm,
- MMX_PACKUSWBirm)>;
+def : InstRW<[Zn2WriteFPU12], (instrs MMX_PACKSSDWrr,
+ MMX_PACKSSWBrr,
+ MMX_PACKUSWBrr)>;
+def : InstRW<[Zn2WriteFPU12m], (instrs MMX_PACKSSDWrm,
+ MMX_PACKSSWBrm,
+ MMX_PACKUSWBrm)>;
def Zn2WriteFPU013 : SchedWriteRes<[Zn2FPU013]> ;
def Zn2WriteFPU013Y : SchedWriteRes<[Zn2FPU013]> ;
@@ -1304,15 +1304,15 @@ def Zn2WriteCVTPS2PIr: SchedWriteRes<[Zn2FPU3]> {
}
// CVT(T)PS2PI.
// mm,x.
-def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>;
+def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIrr")>;
// CVTPI2PD.
// x,mm.
-def : InstRW<[Zn2WriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>;
+def : InstRW<[Zn2WriteCVTPS2PDr], (instrs MMX_CVTPI2PDrr)>;
// CVT(T)PD2PI.
// mm,x.
-def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
+def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIrr")>;
def Zn2WriteCVSTSI2SSr: SchedWriteRes<[Zn2FPU3]> {
let Latency = 3;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index be07c069aae1..f4e03ac11f0b 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -1075,9 +1075,9 @@ def Zn3WriteVecALUXMMX : SchedWriteRes<[Zn3FPVAdd01]> {
}
def : InstRW<[Zn3WriteVecALUXMMX], (instrs MMX_PABSBrr, MMX_PABSDrr, MMX_PABSWrr,
MMX_PSIGNBrr, MMX_PSIGNDrr, MMX_PSIGNWrr,
- MMX_PADDSBirr, MMX_PADDSWirr, MMX_PADDUSBirr, MMX_PADDUSWirr,
- MMX_PAVGBirr, MMX_PAVGWirr,
- MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr)>;
+ MMX_PADDSBrr, MMX_PADDSWrr, MMX_PADDUSBrr, MMX_PADDUSWrr,
+ MMX_PAVGBrr, MMX_PAVGWrr,
+ MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr)>;
defm : Zn3WriteResYMMPair<WriteVecALUY, [Zn3FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (YMM).
@@ -1161,7 +1161,7 @@ def Zn3WriteCvtPD2IMMX : SchedWriteRes<[Zn3FPFCvt01]> {
let ResourceCycles = [2];
let NumMicroOps = 2;
}
-def : InstRW<[Zn3WriteCvtPD2IMMX], (instrs MMX_CVTPD2PIirm, MMX_CVTTPD2PIirm, MMX_CVTPD2PIirr, MMX_CVTTPD2PIirr)>;
+def : InstRW<[Zn3WriteCvtPD2IMMX], (instrs MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm, MMX_CVTPD2PIrr, MMX_CVTTPD2PIrr)>;
defm : Zn3WriteResXMMPair<WriteCvtSS2I, [Zn3FPFCvt01], 2, [2], 2>; // Float -> Integer.
@@ -1179,7 +1179,7 @@ def Zn3WriteCvtI2PDMMX : SchedWriteRes<[Zn3FPFCvt01]> {
let ResourceCycles = [6];
let NumMicroOps = 2;
}
-def : InstRW<[Zn3WriteCvtI2PDMMX], (instrs MMX_CVTPI2PDirm, MMX_CVTPI2PDirr)>;
+def : InstRW<[Zn3WriteCvtI2PDMMX], (instrs MMX_CVTPI2PDrm, MMX_CVTPI2PDrr)>;
defm : Zn3WriteResXMMPair<WriteCvtI2SS, [Zn3FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> Float.
defm : Zn3WriteResXMMPair<WriteCvtI2PS, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Float (XMM).
@@ -1191,7 +1191,7 @@ def Zn3WriteCvtI2PSMMX : SchedWriteRes<[Zn3FPFCvt01]> {
let ResourceCycles = [1];
let NumMicroOps = 2;
}
-def : InstRW<[Zn3WriteCvtI2PSMMX], (instrs MMX_CVTPI2PSirr)>;
+def : InstRW<[Zn3WriteCvtI2PSMMX], (instrs MMX_CVTPI2PSrr)>;
defm : Zn3WriteResXMMPair<WriteCvtSS2SD, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Double size conversion.
defm : Zn3WriteResXMMPair<WriteCvtPS2PD, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Double size conversion (XMM).
@@ -1621,7 +1621,7 @@ def : IsDepBreakingFunction<[
// MMX
DepBreakingClass<[
- MMX_PCMPEQBirr, MMX_PCMPEQWirr, MMX_PCMPEQDirr
+ MMX_PCMPEQBrr, MMX_PCMPEQWrr, MMX_PCMPEQDrr
], ZeroIdiomPredicate>,
// SSE
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 9da54dc2e9b7..5d773f0c57df 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -958,8 +958,7 @@ public:
// extended frames should be flagged as present.
const Triple &TT = getTargetTriple();
- unsigned Major, Minor, Micro;
- TT.getOSVersion(Major, Minor, Micro);
+ unsigned Major = TT.getOSVersion().getMajor();
switch(TT.getOS()) {
default:
return false;
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 336985f3bf9d..78bc5519c23f 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -588,6 +588,18 @@ void X86PassConfig::addPreEmitPass2() {
// Insert pseudo probe annotation for callsite profiling
addPass(createPseudoProbeInserter());
+
+ // On Darwin platforms, BLR_RVMARKER pseudo instructions are lowered to
+ // bundles.
+ if (TT.isOSDarwin())
+ addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
+ // Only run bundle expansion if there are relevant ObjC runtime functions
+ // present in the module.
+ const Function &F = MF.getFunction();
+ const Module *M = F.getParent();
+ return M->getFunction("objc_retainAutoreleasedReturnValue") ||
+ M->getFunction("objc_unsafeClaimAutoreleasedReturnValue");
+ }));
}
bool X86PassConfig::addPostFastRegAllocRewrite() {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 869762b35196..d8cd7311a0d5 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -236,47 +236,50 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
}
}
- if ((ISD == ISD::MUL || ISD == ISD::SDIV || ISD == ISD::SREM ||
- ISD == ISD::UDIV || ISD == ISD::UREM) &&
+ // Vector multiply by pow2 will be simplified to shifts.
+ if (ISD == ISD::MUL &&
(Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
- Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
- // Vector multiply by pow2 will be simplified to shifts.
- if (ISD == ISD::MUL) {
- InstructionCost Cost = getArithmeticInstrCost(
- Instruction::Shl, Ty, CostKind, Op1Info, Op2Info,
- TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
- return Cost;
- }
-
- if (ISD == ISD::SDIV || ISD == ISD::SREM) {
- // On X86, vector signed division by constants power-of-two are
- // normally expanded to the sequence SRA + SRL + ADD + SRA.
- // The OperandValue properties may not be the same as that of the previous
- // operation; conservatively assume OP_None.
- InstructionCost Cost =
- 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
- Op2Info, TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
- Op2Info, TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
- Op2Info, TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
+ Opd2PropInfo == TargetTransformInfo::OP_PowerOf2)
+ return getArithmeticInstrCost(Instruction::Shl, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
- if (ISD == ISD::SREM) {
- // For SREM: (X % C) is the equivalent of (X - (X/C)*C)
- Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
- Op2Info);
- Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
- Op2Info);
- }
+ // On X86, vector signed division by constants power-of-two are
+ // normally expanded to the sequence SRA + SRL + ADD + SRA.
+ // The OperandValue properties may not be the same as that of the previous
+ // operation; conservatively assume OP_None.
+ if ((ISD == ISD::SDIV || ISD == ISD::SREM) &&
+ (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
+ InstructionCost Cost =
+ 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
- return Cost;
+ if (ISD == ISD::SREM) {
+ // For SREM: (X % C) is the equivalent of (X - (X/C)*C)
+ Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
+ Op2Info);
+ Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
+ Op2Info);
}
- // Vector unsigned division/remainder will be simplified to shifts/masks.
+ return Cost;
+ }
+
+ // Vector unsigned division/remainder will be simplified to shifts/masks.
+ if ((ISD == ISD::UDIV || ISD == ISD::UREM) &&
+ (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
if (ISD == ISD::UDIV)
return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
Op2Info, TargetTransformInfo::OP_None,
@@ -660,6 +663,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::MUL, MVT::v8i32, 1 }, // pmulld (Skylake from agner.org)
{ ISD::MUL, MVT::v4i32, 1 }, // pmulld (Skylake from agner.org)
{ ISD::MUL, MVT::v8i64, 6 }, // 3*pmuludq/3*shift/2*add
+ { ISD::MUL, MVT::i64, 1 }, // Skylake from http://www.agner.org/
{ ISD::FNEG, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/
{ ISD::FADD, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/
@@ -5188,10 +5192,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
return (RealCallerBits & RealCalleeBits) == RealCalleeBits;
}
-bool X86TTIImpl::areFunctionArgsABICompatible(
- const Function *Caller, const Function *Callee,
- SmallPtrSetImpl<Argument *> &Args) const {
- if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
+bool X86TTIImpl::areTypesABICompatible(const Function *Caller,
+ const Function *Callee,
+ const ArrayRef<Type *> &Types) const {
+ if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
return false;
// If we get here, we know the target features match. If one function
@@ -5206,13 +5210,8 @@ bool X86TTIImpl::areFunctionArgsABICompatible(
// Consider the arguments compatible if they aren't vectors or aggregates.
// FIXME: Look at the size of vectors.
// FIXME: Look at the element types of aggregates to see if there are vectors.
- // FIXME: The API of this function seems intended to allow arguments
- // to be removed from the set, but the caller doesn't check if the set
- // becomes empty so that may not work in practice.
- return llvm::none_of(Args, [](Argument *A) {
- auto *EltTy = cast<PointerType>(A->getType())->getElementType();
- return EltTy->isVectorTy() || EltTy->isAggregateType();
- });
+ return llvm::none_of(Types,
+ [](Type *T) { return T->isVectorTy() || T->isAggregateType(); });
}
X86TTIImpl::TTI::MemCmpExpansionOptions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index c53424ec0026..11e9cb09c7d5 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -234,9 +234,8 @@ public:
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- bool areFunctionArgsABICompatible(const Function *Caller,
- const Function *Callee,
- SmallPtrSetImpl<Argument *> &Args) const;
+ bool areTypesABICompatible(const Function *Caller, const Function *Callee,
+ const ArrayRef<Type *> &Type) const;
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
bool prefersVectorizedAddressing() const;
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index abac3f801a22..4624b735bef8 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -475,12 +475,12 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
// any of its operands, this way, when we get to the operand, we already
// removed the instructions (from the expression dag) that uses it.
CurrentTruncInst->eraseFromParent();
- for (auto I = InstInfoMap.rbegin(), E = InstInfoMap.rend(); I != E; ++I) {
+ for (auto &I : llvm::reverse(InstInfoMap)) {
// We still need to check that the instruction has no users before we erase
// it, because {SExt, ZExt}Inst Instruction might have other users that was
// not reduced, in such case, we need to keep that instruction.
- if (I->first->use_empty())
- I->first->eraseFromParent();
+ if (I.first->use_empty())
+ I.first->eraseFromParent();
}
}
diff --git a/llvm/lib/Transforms/CFGuard/CFGuard.cpp b/llvm/lib/Transforms/CFGuard/CFGuard.cpp
index 96c083a144b2..5fc5295969d0 100644
--- a/llvm/lib/Transforms/CFGuard/CFGuard.cpp
+++ b/llvm/lib/Transforms/CFGuard/CFGuard.cpp
@@ -165,6 +165,12 @@ void CFGuard::insertCFGuardCheck(CallBase *CB) {
IRBuilder<> B(CB);
Value *CalledOperand = CB->getCalledOperand();
+ // If the indirect call is called within catchpad or cleanuppad,
+ // we need to copy "funclet" bundle of the call.
+ SmallVector<llvm::OperandBundleDef, 1> Bundles;
+ if (auto Bundle = CB->getOperandBundle(LLVMContext::OB_funclet))
+ Bundles.push_back(OperandBundleDef(*Bundle));
+
// Load the global symbol as a pointer to the check function.
LoadInst *GuardCheckLoad = B.CreateLoad(GuardFnPtrType, GuardFnGlobal);
@@ -172,7 +178,7 @@ void CFGuard::insertCFGuardCheck(CallBase *CB) {
// even if the original CallBase is an Invoke or CallBr instruction.
CallInst *GuardCheck =
B.CreateCall(GuardFnType, GuardCheckLoad,
- {B.CreateBitCast(CalledOperand, B.getInt8PtrTy())});
+ {B.CreateBitCast(CalledOperand, B.getInt8PtrTy())}, Bundles);
// Ensure that the first argument is passed in the correct register
// (e.g. ECX on 32-bit X86 targets).
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index ac3d078714ce..a0d12865bd3a 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -1237,8 +1237,10 @@ namespace {
struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
using Base = PtrUseVisitor<AllocaUseVisitor>;
AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
- const CoroBeginInst &CB, const SuspendCrossingInfo &Checker)
- : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {}
+ const CoroBeginInst &CB, const SuspendCrossingInfo &Checker,
+ bool ShouldUseLifetimeStartInfo)
+ : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker),
+ ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {}
void visit(Instruction &I) {
Users.insert(&I);
@@ -1390,6 +1392,7 @@ private:
SmallPtrSet<Instruction *, 4> Users{};
SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
bool MayWriteBeforeCoroBegin{false};
+ bool ShouldUseLifetimeStartInfo{true};
mutable llvm::Optional<bool> ShouldLiveOnFrame{};
@@ -1398,7 +1401,7 @@ private:
// more precise. We look at every pair of lifetime.start intrinsic and
// every basic block that uses the pointer to see if they cross suspension
// points. The uses cover both direct uses as well as indirect uses.
- if (!LifetimeStarts.empty()) {
+ if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
for (auto *I : Users)
for (auto *S : LifetimeStarts)
if (Checker.isDefinitionAcrossSuspend(*S, I))
@@ -2484,8 +2487,15 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
continue;
}
DominatorTree DT(F);
+ // The code that uses lifetime.start intrinsic does not work for functions
+ // with loops without exit. Disable it on ABIs we know to generate such
+ // code.
+ bool ShouldUseLifetimeStartInfo =
+ (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
+ Shape.ABI != coro::ABI::RetconOnce);
AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,
- *Shape.CoroBegin, Checker};
+ *Shape.CoroBegin, Checker,
+ ShouldUseLifetimeStartInfo};
Visitor.visitPtr(*AI);
if (!Visitor.getShouldLiveOnFrame())
continue;
@@ -2572,9 +2582,15 @@ void coro::salvageDebugInfo(
DVI->setExpression(Expr);
/// It makes no sense to move the dbg.value intrinsic.
if (!isa<DbgValueInst>(DVI)) {
- if (auto *InsertPt = dyn_cast<Instruction>(Storage))
+ if (auto *II = dyn_cast<InvokeInst>(Storage))
+ DVI->moveBefore(II->getNormalDest()->getFirstNonPHI());
+ else if (auto *CBI = dyn_cast<CallBrInst>(Storage))
+ DVI->moveBefore(CBI->getDefaultDest()->getFirstNonPHI());
+ else if (auto *InsertPt = dyn_cast<Instruction>(Storage)) {
+ assert(!InsertPt->isTerminator() &&
+ "Unimaged terminator that could return a storage.");
DVI->moveAfter(InsertPt);
- else if (isa<Argument>(Storage))
+ } else if (isa<Argument>(Storage))
DVI->moveAfter(F->getEntryBlock().getFirstNonPHI());
}
}
@@ -2664,7 +2680,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
}
}
- sinkLifetimeStartMarkers(F, Shape, Checker);
+ if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
+ Shape.ABI != coro::ABI::RetconOnce)
+ sinkLifetimeStartMarkers(F, Shape, Checker);
+
if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);
LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index fa1d92f439b8..12c1829524ef 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -280,6 +280,27 @@ static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
BB->getTerminator()->eraseFromParent();
}
+// Mark a coroutine as done, which implies that the coroutine is finished and
+// never get resumed.
+//
+// In resume-switched ABI, the done state is represented by storing zero in
+// ResumeFnAddr.
+//
+// NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the
+// pointer to the frame in splitted function is not stored in `Shape`.
+static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape,
+ Value *FramePtr) {
+ assert(
+ Shape.ABI == coro::ABI::Switch &&
+ "markCoroutineAsDone is only supported for Switch-Resumed ABI for now.");
+ auto *GepIndex = Builder.CreateStructGEP(
+ Shape.FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Resume,
+ "ResumeFn.addr");
+ auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
+ Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume)));
+ Builder.CreateStore(NullPtr, GepIndex);
+}
+
/// Replace an unwind call to llvm.coro.end.
static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
Value *FramePtr, bool InResume,
@@ -288,10 +309,18 @@ static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
switch (Shape.ABI) {
// In switch-lowering, this does nothing in the main function.
- case coro::ABI::Switch:
+ case coro::ABI::Switch: {
+ // In C++'s specification, the coroutine should be marked as done
+ // if promise.unhandled_exception() throws. The frontend will
+ // call coro.end(true) along this path.
+ //
+ // FIXME: We should refactor this once there is other language
+ // which uses Switch-Resumed style other than C++.
+ markCoroutineAsDone(Builder, Shape, FramePtr);
if (!InResume)
return;
break;
+ }
// In async lowering this does nothing.
case coro::ABI::Async:
break;
@@ -364,13 +393,9 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
auto *Save = S->getCoroSave();
Builder.SetInsertPoint(Save);
if (S->isFinal()) {
- // Final suspend point is represented by storing zero in ResumeFnAddr.
- auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr,
- coro::Shape::SwitchFieldIndex::Resume,
- "ResumeFn.addr");
- auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
- FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume)));
- Builder.CreateStore(NullPtr, GepIndex);
+ // The coroutine should be marked done if it reaches the final suspend
+ // point.
+ markCoroutineAsDone(Builder, Shape, FramePtr);
} else {
auto *GepIndex = Builder.CreateStructGEP(
FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index e4883ef89db7..fba8b03e44ba 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -141,7 +141,6 @@ static bool isCoroutineIntrinsicName(StringRef Name) {
"llvm.coro.id.retcon",
"llvm.coro.id.retcon.once",
"llvm.coro.noop",
- "llvm.coro.param",
"llvm.coro.prepare.async",
"llvm.coro.prepare.retcon",
"llvm.coro.promise",
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 93bb11433775..3a42a2cac928 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -835,14 +835,20 @@ bool ArgumentPromotionPass::areFunctionArgsABICompatible(
const Function &F, const TargetTransformInfo &TTI,
SmallPtrSetImpl<Argument *> &ArgsToPromote,
SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
+ // TODO: Check individual arguments so we can promote a subset?
+ SmallVector<Type *, 32> Types;
+ for (Argument *Arg : ArgsToPromote)
+ Types.push_back(Arg->getType()->getPointerElementType());
+ for (Argument *Arg : ByValArgsToTransform)
+ Types.push_back(Arg->getParamByValType());
+
for (const Use &U : F.uses()) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
if (!CB)
return false;
const Function *Caller = CB->getCaller();
const Function *Callee = CB->getCalledFunction();
- if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) ||
- !TTI.areFunctionArgsABICompatible(Caller, Callee, ByValArgsToTransform))
+ if (!TTI.areTypesABICompatible(Caller, Callee, Types))
return false;
}
return true;
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index edadc79e3a9f..7e729e57153c 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -2139,12 +2139,10 @@ bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) {
bool Result = true;
#ifndef NDEBUG
if (SeedAllowList.size() != 0)
- Result =
- std::count(SeedAllowList.begin(), SeedAllowList.end(), AA.getName());
+ Result = llvm::is_contained(SeedAllowList, AA.getName());
Function *Fn = AA.getAnchorScope();
if (FunctionSeedAllowList.size() != 0 && Fn)
- Result &= std::count(FunctionSeedAllowList.begin(),
- FunctionSeedAllowList.end(), Fn->getName());
+ Result &= llvm::is_contained(FunctionSeedAllowList, Fn->getName());
#endif
return Result;
}
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index ec08287393de..b977821bcaa6 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -417,7 +417,7 @@ const Value *stripAndAccumulateMinimalOffsets(
AttributorAnalysis);
}
-static const Value *getMinimalBaseOfAccsesPointerOperand(
+static const Value *getMinimalBaseOfAccessPointerOperand(
Attributor &A, const AbstractAttribute &QueryingAA, const Instruction *I,
int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) {
const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false);
@@ -2129,7 +2129,7 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
int64_t Offset;
const Value *Base =
- getMinimalBaseOfAccsesPointerOperand(A, QueryingAA, I, Offset, DL);
+ getMinimalBaseOfAccessPointerOperand(A, QueryingAA, I, Offset, DL);
if (Base) {
if (Base == &AssociatedValue &&
getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
@@ -6414,31 +6414,36 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
return indicatePessimisticFixpoint();
}
+ // Collect the types that will replace the privatizable type in the function
+ // signature.
+ SmallVector<Type *, 16> ReplacementTypes;
+ identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+
// Verify callee and caller agree on how the promoted argument would be
// passed.
- // TODO: The use of the ArgumentPromotion interface here is ugly, we need a
- // specialized form of TargetTransformInfo::areFunctionArgsABICompatible
- // which doesn't require the arguments ArgumentPromotion wanted to pass.
Function &Fn = *getIRPosition().getAnchorScope();
- SmallPtrSet<Argument *, 1> ArgsToPromote, Dummy;
- ArgsToPromote.insert(getAssociatedArgument());
const auto *TTI =
A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(Fn);
- if (!TTI ||
- !ArgumentPromotionPass::areFunctionArgsABICompatible(
- Fn, *TTI, ArgsToPromote, Dummy) ||
- ArgsToPromote.empty()) {
+ if (!TTI) {
+ LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Missing TTI for function "
+ << Fn.getName() << "\n");
+ return indicatePessimisticFixpoint();
+ }
+
+ auto CallSiteCheck = [&](AbstractCallSite ACS) {
+ CallBase *CB = ACS.getInstruction();
+ return TTI->areTypesABICompatible(
+ CB->getCaller(), CB->getCalledFunction(), ReplacementTypes);
+ };
+ bool AllCallSitesKnown;
+ if (!A.checkForAllCallSites(CallSiteCheck, *this, true,
+ AllCallSitesKnown)) {
LLVM_DEBUG(
dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for "
<< Fn.getName() << "\n");
return indicatePessimisticFixpoint();
}
- // Collect the types that will replace the privatizable type in the function
- // signature.
- SmallVector<Type *, 16> ReplacementTypes;
- identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
-
// Register a rewrite of the argument.
Argument *Arg = getAssociatedArgument();
if (!A.isValidFunctionSignatureRewrite(*Arg, ReplacementTypes)) {
@@ -6558,7 +6563,6 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
return false;
};
- bool AllCallSitesKnown;
if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this, true,
AllCallSitesKnown))
return indicatePessimisticFixpoint();
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index cde78713b554..321d4a19a585 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -76,6 +76,7 @@ STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
STATISTIC(NumReturned, "Number of arguments marked returned");
STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
+STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
@@ -580,16 +581,8 @@ struct ArgumentUsesTracker : public CaptureTracker {
return true;
}
- // Note: the callee and the two successor blocks *follow* the argument
- // operands. This means there is no need to adjust UseIndex to account for
- // these.
-
- unsigned UseIndex =
- std::distance(const_cast<const Use *>(CB->arg_begin()), U);
-
- assert(UseIndex < CB->data_operands_size() &&
- "Indirect function calls should have been filtered above!");
-
+ assert(!CB->isCallee(U) && "callee operand reported captured?");
+ const unsigned UseIndex = CB->getDataOperandNo(U);
if (UseIndex >= CB->arg_size()) {
// Data operand, but not a argument operand -- must be a bundle operand
assert(CB->hasOperandBundles() && "Must be!");
@@ -649,8 +642,8 @@ struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
static Attribute::AttrKind
-determinePointerReadAttrs(Argument *A,
- const SmallPtrSet<Argument *, 8> &SCCNodes) {
+determinePointerAccessAttrs(Argument *A,
+ const SmallPtrSet<Argument *, 8> &SCCNodes) {
SmallVector<Use *, 32> Worklist;
SmallPtrSet<Use *, 32> Visited;
@@ -659,7 +652,7 @@ determinePointerReadAttrs(Argument *A,
return Attribute::None;
bool IsRead = false;
- // We don't need to track IsWritten. If A is written to, return immediately.
+ bool IsWrite = false;
for (Use &U : A->uses()) {
Visited.insert(&U);
@@ -667,6 +660,10 @@ determinePointerReadAttrs(Argument *A,
}
while (!Worklist.empty()) {
+ if (IsWrite && IsRead)
+ // No point in searching further..
+ return Attribute::None;
+
Use *U = Worklist.pop_back_val();
Instruction *I = cast<Instruction>(U->getUser());
@@ -684,73 +681,49 @@ determinePointerReadAttrs(Argument *A,
case Instruction::Call:
case Instruction::Invoke: {
- bool Captures = true;
+ CallBase &CB = cast<CallBase>(*I);
+ if (CB.isCallee(U)) {
+ IsRead = true;
+ // Note that indirect calls do not capture, see comment in
+ // CaptureTracking for context
+ continue;
+ }
- if (I->getType()->isVoidTy())
- Captures = false;
+ // Given we've explictily handled the callee operand above, what's left
+ // must be a data operand (e.g. argument or operand bundle)
+ const unsigned UseIndex = CB.getDataOperandNo(U);
- auto AddUsersToWorklistIfCapturing = [&] {
- if (Captures)
+ if (!CB.doesNotCapture(UseIndex)) {
+ if (!CB.onlyReadsMemory())
+ // If the callee can save a copy into other memory, then simply
+ // scanning uses of the call is insufficient. We have no way
+ // of tracking copies of the pointer through memory to see
+ // if a reloaded copy is written to, thus we must give up.
+ return Attribute::None;
+ // Push users for processing once we finish this one
+ if (!I->getType()->isVoidTy())
for (Use &UU : I->uses())
if (Visited.insert(&UU).second)
Worklist.push_back(&UU);
- };
-
- CallBase &CB = cast<CallBase>(*I);
- if (CB.doesNotAccessMemory()) {
- AddUsersToWorklistIfCapturing();
- continue;
}
+
+ if (CB.doesNotAccessMemory())
+ continue;
- Function *F = CB.getCalledFunction();
- if (!F) {
- if (CB.onlyReadsMemory()) {
- IsRead = true;
- AddUsersToWorklistIfCapturing();
- continue;
- }
- return Attribute::None;
- }
-
- // Note: the callee and the two successor blocks *follow* the argument
- // operands. This means there is no need to adjust UseIndex to account
- // for these.
-
- unsigned UseIndex = std::distance(CB.arg_begin(), U);
-
- // U cannot be the callee operand use: since we're exploring the
- // transitive uses of an Argument, having such a use be a callee would
- // imply the call site is an indirect call or invoke; and we'd take the
- // early exit above.
- assert(UseIndex < CB.data_operands_size() &&
- "Data operand use expected!");
-
- bool IsOperandBundleUse = UseIndex >= CB.arg_size();
+ if (Function *F = CB.getCalledFunction())
+ if (CB.isArgOperand(U) && UseIndex < F->arg_size() &&
+ SCCNodes.count(F->getArg(UseIndex)))
+ // This is an argument which is part of the speculative SCC. Note
+ // that only operands corresponding to formal arguments of the callee
+ // can participate in the speculation.
+ break;
- if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
- assert(F->isVarArg() && "More params than args in non-varargs call");
+ // The accessors used on call site here do the right thing for calls and
+ // invokes with operand bundles.
+ if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(UseIndex))
return Attribute::None;
- }
-
- Captures &= !CB.doesNotCapture(UseIndex);
-
- // Since the optimizer (by design) cannot see the data flow corresponding
- // to a operand bundle use, these cannot participate in the optimistic SCC
- // analysis. Instead, we model the operand bundle uses as arguments in
- // call to a function external to the SCC.
- if (IsOperandBundleUse ||
- !SCCNodes.count(&*std::next(F->arg_begin(), UseIndex))) {
-
- // The accessors used on call site here do the right thing for calls and
- // invokes with operand bundles.
-
- if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(UseIndex))
- return Attribute::None;
- if (!CB.doesNotAccessMemory(UseIndex))
- IsRead = true;
- }
-
- AddUsersToWorklistIfCapturing();
+ if (!CB.doesNotAccessMemory(UseIndex))
+ IsRead = true;
break;
}
@@ -763,6 +736,19 @@ determinePointerReadAttrs(Argument *A,
IsRead = true;
break;
+ case Instruction::Store:
+ if (cast<StoreInst>(I)->getValueOperand() == *U)
+ // untrackable capture
+ return Attribute::None;
+
+ // A volatile store has side effects beyond what writeonly can be relied
+ // upon.
+ if (cast<StoreInst>(I)->isVolatile())
+ return Attribute::None;
+
+ IsWrite = true;
+ break;
+
case Instruction::ICmp:
case Instruction::Ret:
break;
@@ -772,7 +758,14 @@ determinePointerReadAttrs(Argument *A,
}
}
- return IsRead ? Attribute::ReadOnly : Attribute::ReadNone;
+ if (IsWrite && IsRead)
+ return Attribute::None;
+ else if (IsRead)
+ return Attribute::ReadOnly;
+ else if (IsWrite)
+ return Attribute::WriteOnly;
+ else
+ return Attribute::ReadNone;
}
/// Deduce returned attributes for the SCC.
@@ -865,9 +858,10 @@ static bool addArgumentAttrsFromCallsites(Function &F) {
return Changed;
}
-static bool addReadAttr(Argument *A, Attribute::AttrKind R) {
- assert((R == Attribute::ReadOnly || R == Attribute::ReadNone)
- && "Must be a Read attribute.");
+static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
+ assert((R == Attribute::ReadOnly || R == Attribute::ReadNone ||
+ R == Attribute::WriteOnly)
+ && "Must be an access attribute.");
assert(A && "Argument must not be null.");
// If the argument already has the attribute, nothing needs to be done.
@@ -880,7 +874,12 @@ static bool addReadAttr(Argument *A, Attribute::AttrKind R) {
A->removeAttr(Attribute::ReadOnly);
A->removeAttr(Attribute::ReadNone);
A->addAttr(R);
- R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
+ if (R == Attribute::ReadOnly)
+ ++NumReadOnlyArg;
+ else if (R == Attribute::WriteOnly)
+ ++NumWriteOnlyArg;
+ else
+ ++NumReadNoneArg;
return true;
}
@@ -945,15 +944,15 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
// Otherwise, it's captured. Don't bother doing SCC analysis on it.
}
if (!HasNonLocalUses && !A->onlyReadsMemory()) {
- // Can we determine that it's readonly/readnone without doing an SCC?
- // Note that we don't allow any calls at all here, or else our result
- // will be dependent on the iteration order through the functions in the
- // SCC.
+ // Can we determine that it's readonly/readnone/writeonly without doing
+ // an SCC? Note that we don't allow any calls at all here, or else our
+ // result will be dependent on the iteration order through the
+ // functions in the SCC.
SmallPtrSet<Argument *, 8> Self;
Self.insert(&*A);
- Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
+ Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self);
if (R != Attribute::None)
- if (addReadAttr(A, R))
+ if (addAccessAttr(A, R))
Changed.insert(F);
}
}
@@ -979,6 +978,13 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed.insert(A->getParent());
+
+ // Infer the access attributes given the new nocapture one
+ SmallPtrSet<Argument *, 8> Self;
+ Self.insert(&*A);
+ Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self);
+ if (R != Attribute::None)
+ addAccessAttr(A, R);
}
continue;
}
@@ -1023,10 +1029,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
Changed.insert(A->getParent());
}
- // We also want to compute readonly/readnone. With a small number of false
- // negatives, we can assume that any pointer which is captured isn't going
- // to be provably readonly or readnone, since by definition we can't
- // analyze all uses of a captured pointer.
+ // We also want to compute readonly/readnone/writeonly. With a small number
+ // of false negatives, we can assume that any pointer which is captured
+ // isn't going to be provably readonly or readnone, since by definition
+ // we can't analyze all uses of a captured pointer.
//
// The false negatives happen when the pointer is captured by a function
// that promises readonly/readnone behaviour on the pointer, then the
@@ -1034,24 +1040,28 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
// Also, a readonly/readnone pointer may be returned, but returning a
// pointer is capturing it.
- Attribute::AttrKind ReadAttr = Attribute::ReadNone;
- for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+ auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) {
+ if (A == B)
+ return A;
+ if (A == Attribute::ReadNone)
+ return B;
+ if (B == Attribute::ReadNone)
+ return A;
+ return Attribute::None;
+ };
+
+ Attribute::AttrKind AccessAttr = Attribute::ReadNone;
+ for (unsigned i = 0, e = ArgumentSCC.size();
+ i != e && AccessAttr != Attribute::None; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes);
- if (K == Attribute::ReadNone)
- continue;
- if (K == Attribute::ReadOnly) {
- ReadAttr = Attribute::ReadOnly;
- continue;
- }
- ReadAttr = K;
- break;
+ Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes);
+ AccessAttr = meetAccessAttr(AccessAttr, K);
}
- if (ReadAttr != Attribute::None) {
+ if (AccessAttr != Attribute::None) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- if (addReadAttr(A, ReadAttr))
+ if (addAccessAttr(A, AccessAttr))
Changed.insert(A->getParent());
}
}
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index fbd083bb9bbf..2425646455bd 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -64,8 +64,8 @@ static cl::opt<unsigned> FuncSpecializationMaxIters(
cl::desc("The maximum number of iterations function specialization is run"),
cl::init(1));
-static cl::opt<unsigned> MaxConstantsThreshold(
- "func-specialization-max-constants", cl::Hidden,
+static cl::opt<unsigned> MaxClonesThreshold(
+ "func-specialization-max-clones", cl::Hidden,
cl::desc("The maximum number of clones allowed for a single function "
"specialization"),
cl::init(3));
@@ -92,6 +92,28 @@ static cl::opt<bool> EnableSpecializationForLiteralConstant(
cl::desc("Enable specialization of functions that take a literal constant "
"as an argument."));
+namespace {
+// Bookkeeping struct to pass data from the analysis and profitability phase
+// to the actual transform helper functions.
+struct ArgInfo {
+ Function *Fn; // The function to perform specialisation on.
+ Argument *Arg; // The Formal argument being analysed.
+ Constant *Const; // A corresponding actual constant argument.
+ InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
+
+ // Flag if this will be a partial specialization, in which case we will need
+ // to keep the original function around in addition to the added
+ // specializations.
+ bool Partial = false;
+
+ ArgInfo(Function *F, Argument *A, Constant *C, InstructionCost G)
+ : Fn(F), Arg(A), Const(C), Gain(G){};
+};
+} // Anonymous namespace
+
+using FuncList = SmallVectorImpl<Function *>;
+using ConstList = SmallVectorImpl<Constant *>;
+
// Helper to check if \p LV is either a constant or a constant
// range with a single element. This should cover exactly the same cases as the
// old ValueLatticeElement::isConstant() and is intended to be used in the
@@ -169,7 +191,7 @@ static Constant *getConstantStackValue(CallInst *Call, Value *Val,
// ret void
// }
//
-static void constantArgPropagation(SmallVectorImpl<Function *> &WorkList,
+static void constantArgPropagation(FuncList &WorkList,
Module &M, SCCPSolver &Solver) {
// Iterate over the argument tracked functions see if there
// are any new constant values for the call instruction via
@@ -254,40 +276,33 @@ public:
///
/// \returns true if at least one function is specialized.
bool
- specializeFunctions(SmallVectorImpl<Function *> &FuncDecls,
- SmallVectorImpl<Function *> &CurrentSpecializations) {
-
- // Attempt to specialize the argument-tracked functions.
+ specializeFunctions(FuncList &FuncDecls,
+ FuncList &CurrentSpecializations) {
bool Changed = false;
for (auto *F : FuncDecls) {
- if (specializeFunction(F, CurrentSpecializations)) {
- Changed = true;
- LLVM_DEBUG(dbgs() << "FnSpecialization: Can specialize this func.\n");
- } else {
+ if (!isCandidateFunction(F, CurrentSpecializations))
+ continue;
+
+ auto Cost = getSpecializationCost(F);
+ if (!Cost.isValid()) {
LLVM_DEBUG(
- dbgs() << "FnSpecialization: Cannot specialize this func.\n");
+ dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
+ continue;
}
- }
- for (auto *SpecializedFunc : CurrentSpecializations) {
- SpecializedFuncs.insert(SpecializedFunc);
-
- // Initialize the state of the newly created functions, marking them
- // argument-tracked and executable.
- if (SpecializedFunc->hasExactDefinition() &&
- !SpecializedFunc->hasFnAttribute(Attribute::Naked))
- Solver.addTrackedFunction(SpecializedFunc);
- Solver.addArgumentTrackedFunction(SpecializedFunc);
- FuncDecls.push_back(SpecializedFunc);
- Solver.markBlockExecutable(&SpecializedFunc->front());
+ auto ConstArgs = calculateGains(F, Cost);
+ if (ConstArgs.empty()) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n");
+ continue;
+ }
- // Replace the function arguments for the specialized functions.
- for (Argument &Arg : SpecializedFunc->args())
- if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg))
- LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: "
- << Arg.getName() << "\n");
+ for (auto &CA : ConstArgs) {
+ specializeFunction(CA, CurrentSpecializations);
+ Changed = true;
+ }
}
+ updateSpecializedFuncs(FuncDecls, CurrentSpecializations);
NumFuncSpecialized += NbFunctionsSpecialized;
return Changed;
}
@@ -333,15 +348,83 @@ private:
return Clone;
}
- /// This function decides whether to specialize function \p F based on the
- /// known constant values its arguments can take on. Specialization is
- /// performed on the first interesting argument. Specializations based on
- /// additional arguments will be evaluated on following iterations of the
- /// main IPSCCP solve loop. \returns true if the function is specialized and
- /// false otherwise.
- bool specializeFunction(Function *F,
- SmallVectorImpl<Function *> &Specializations) {
+ /// This function decides whether it's worthwhile to specialize function \p F
+ /// based on the known constant values its arguments can take on, i.e. it
+ /// calculates a gain and returns a list of actual arguments that are deemed
+ /// profitable to specialize. Specialization is performed on the first
+ /// interesting argument. Specializations based on additional arguments will
+ /// be evaluated on following iterations of the main IPSCCP solve loop.
+ SmallVector<ArgInfo> calculateGains(Function *F, InstructionCost Cost) {
+ SmallVector<ArgInfo> Worklist;
+ // Determine if we should specialize the function based on the values the
+ // argument can take on. If specialization is not profitable, we continue
+ // on to the next argument.
+ for (Argument &FormalArg : F->args()) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing arg: "
+ << FormalArg.getName() << "\n");
+ // Determine if this argument is interesting. If we know the argument can
+ // take on any constant values, they are collected in Constants. If the
+ // argument can only ever equal a constant value in Constants, the
+ // function will be completely specialized, and the IsPartial flag will
+ // be set to false by isArgumentInteresting (that function only adds
+ // values to the Constants list that are deemed profitable).
+ bool IsPartial = true;
+ SmallVector<Constant *> ActualConstArg;
+ if (!isArgumentInteresting(&FormalArg, ActualConstArg, IsPartial)) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is not interesting\n");
+ continue;
+ }
+
+ for (auto *ActualArg : ActualConstArg) {
+ InstructionCost Gain =
+ ForceFunctionSpecialization
+ ? 1
+ : getSpecializationBonus(&FormalArg, ActualArg) - Cost;
+ if (Gain <= 0)
+ continue;
+ Worklist.push_back({F, &FormalArg, ActualArg, Gain});
+ }
+
+ if (Worklist.empty())
+ continue;
+
+ // Sort the candidates in descending order.
+ llvm::stable_sort(Worklist, [](const ArgInfo &L, const ArgInfo &R) {
+ return L.Gain > R.Gain;
+ });
+
+ // Truncate the worklist to 'MaxClonesThreshold' candidates if
+ // necessary.
+ if (Worklist.size() > MaxClonesThreshold) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: number of candidates exceed "
+ << "the maximum number of clones threshold.\n"
+ << "Truncating worklist to " << MaxClonesThreshold
+ << " candidates.\n");
+ Worklist.erase(Worklist.begin() + MaxClonesThreshold,
+ Worklist.end());
+ }
+
+ if (IsPartial || Worklist.size() < ActualConstArg.size())
+ for (auto &ActualArg : Worklist)
+ ActualArg.Partial = true;
+
+ LLVM_DEBUG(dbgs() << "Sorted list of candidates by gain:\n";
+ for (auto &C
+ : Worklist) {
+ dbgs() << "- Function = " << C.Fn->getName() << ", ";
+ dbgs() << "FormalArg = " << C.Arg->getName() << ", ";
+ dbgs() << "ActualArg = " << C.Const->getName() << ", ";
+ dbgs() << "Gain = " << C.Gain << "\n";
+ });
+
+ // FIXME: Only one argument per function.
+ break;
+ }
+ return Worklist;
+ }
+
+ bool isCandidateFunction(Function *F, FuncList &Specializations) {
// Do not specialize the cloned function again.
if (SpecializedFuncs.contains(F))
return false;
@@ -362,84 +445,32 @@ private:
LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName()
<< "\n");
+ return true;
+ }
- // Determine if it would be profitable to create a specialization of the
- // function where the argument takes on the given constant value. If so,
- // add the constant to Constants.
- auto FnSpecCost = getSpecializationCost(F);
- if (!FnSpecCost.isValid()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "FnSpecialization: func specialisation cost: ";
- FnSpecCost.print(dbgs()); dbgs() << "\n");
-
- // Determine if we should specialize the function based on the values the
- // argument can take on. If specialization is not profitable, we continue
- // on to the next argument.
- for (Argument &A : F->args()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing arg: " << A.getName()
- << "\n");
- // True if this will be a partial specialization. We will need to keep
- // the original function around in addition to the added specializations.
- bool IsPartial = true;
-
- // Determine if this argument is interesting. If we know the argument can
- // take on any constant values, they are collected in Constants. If the
- // argument can only ever equal a constant value in Constants, the
- // function will be completely specialized, and the IsPartial flag will
- // be set to false by isArgumentInteresting (that function only adds
- // values to the Constants list that are deemed profitable).
- SmallVector<Constant *, 4> Constants;
- if (!isArgumentInteresting(&A, Constants, FnSpecCost, IsPartial)) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is not interesting\n");
- continue;
- }
-
- assert(!Constants.empty() && "No constants on which to specialize");
- LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is interesting!\n"
- << "FnSpecialization: Specializing '" << F->getName()
- << "' on argument: " << A << "\n"
- << "FnSpecialization: Constants are:\n\n";
- for (unsigned I = 0; I < Constants.size(); ++I) dbgs()
- << *Constants[I] << "\n";
- dbgs() << "FnSpecialization: End of constants\n\n");
-
- // Create a version of the function in which the argument is marked
- // constant with the given value.
- for (auto *C : Constants) {
- // Clone the function. We leave the ValueToValueMap empty to allow
- // IPSCCP to propagate the constant arguments.
- Function *Clone = cloneCandidateFunction(F);
- Argument *ClonedArg = Clone->arg_begin() + A.getArgNo();
-
- // Rewrite calls to the function so that they call the clone instead.
- rewriteCallSites(F, Clone, *ClonedArg, C);
+ void specializeFunction(ArgInfo &AI, FuncList &Specializations) {
+ Function *Clone = cloneCandidateFunction(AI.Fn);
+ Argument *ClonedArg = Clone->getArg(AI.Arg->getArgNo());
- // Initialize the lattice state of the arguments of the function clone,
- // marking the argument on which we specialized the function constant
- // with the given value.
- Solver.markArgInFuncSpecialization(F, ClonedArg, C);
+ // Rewrite calls to the function so that they call the clone instead.
+ rewriteCallSites(AI.Fn, Clone, *ClonedArg, AI.Const);
- // Mark all the specialized functions
- Specializations.push_back(Clone);
- NbFunctionsSpecialized++;
- }
+ // Initialize the lattice state of the arguments of the function clone,
+ // marking the argument on which we specialized the function constant
+ // with the given value.
+ Solver.markArgInFuncSpecialization(AI.Fn, ClonedArg, AI.Const);
- // If the function has been completely specialized, the original function
- // is no longer needed. Mark it unreachable.
- if (!IsPartial)
- Solver.markFunctionUnreachable(F);
+ // Mark all the specialized functions
+ Specializations.push_back(Clone);
+ NbFunctionsSpecialized++;
- // FIXME: Only one argument per function.
- return true;
- }
-
- return false;
+ // If the function has been completely specialized, the original function
+ // is no longer needed. Mark it unreachable.
+ if (!AI.Partial)
+ Solver.markFunctionUnreachable(AI.Fn);
}
- /// Compute the cost of specializing function \p F.
+ /// Compute and return the cost of specializing function \p F.
InstructionCost getSpecializationCost(Function *F) {
// Compute the code metrics for the function.
SmallPtrSet<const Value *, 32> EphValues;
@@ -578,9 +609,7 @@ private:
///
/// \returns true if the function should be specialized on the given
/// argument.
- bool isArgumentInteresting(Argument *A,
- SmallVectorImpl<Constant *> &Constants,
- const InstructionCost &FnSpecCost,
+ bool isArgumentInteresting(Argument *A, ConstList &Constants,
bool &IsPartial) {
// For now, don't attempt to specialize functions based on the values of
// composite types.
@@ -608,42 +637,8 @@ private:
//
// TODO 2: this currently does not support constants, i.e. integer ranges.
//
- SmallVector<Constant *, 4> PossibleConstants;
- bool AllConstant = getPossibleConstants(A, PossibleConstants);
- if (PossibleConstants.empty()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n");
- return false;
- }
- if (PossibleConstants.size() > MaxConstantsThreshold) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: number of constants found exceed "
- << "the maximum number of constants threshold.\n");
- return false;
- }
-
- for (auto *C : PossibleConstants) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Constant: " << *C << "\n");
- if (ForceFunctionSpecialization) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Forced!\n");
- Constants.push_back(C);
- continue;
- }
- if (getSpecializationBonus(A, C) > FnSpecCost) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: profitable!\n");
- Constants.push_back(C);
- } else {
- LLVM_DEBUG(dbgs() << "FnSpecialization: not profitable\n");
- }
- }
-
- // None of the constant values the argument can take on were deemed good
- // candidates on which to specialize the function.
- if (Constants.empty())
- return false;
-
- // This will be a partial specialization if some of the constants were
- // rejected due to their profitability.
- IsPartial = !AllConstant || PossibleConstants.size() != Constants.size();
-
+ IsPartial = !getPossibleConstants(A, Constants);
+ LLVM_DEBUG(dbgs() << "FnSpecialization: interesting arg: " << *A << "\n");
return true;
}
@@ -653,8 +648,7 @@ private:
/// \returns true if all of the values the argument can take on are constant
/// (e.g., the argument's parent function cannot be called with an
/// overdefined value).
- bool getPossibleConstants(Argument *A,
- SmallVectorImpl<Constant *> &Constants) {
+ bool getPossibleConstants(Argument *A, ConstList &Constants) {
Function *F = A->getParent();
bool AllConstant = true;
@@ -681,7 +675,7 @@ private:
// For now, constant expressions are fine but only if they are function
// calls.
- if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
if (!isa<Function>(CE->getOperand(0)))
return false;
@@ -737,6 +731,29 @@ private:
}
}
}
+
+ void updateSpecializedFuncs(FuncList &FuncDecls,
+ FuncList &CurrentSpecializations) {
+ for (auto *SpecializedFunc : CurrentSpecializations) {
+ SpecializedFuncs.insert(SpecializedFunc);
+
+ // Initialize the state of the newly created functions, marking them
+ // argument-tracked and executable.
+ if (SpecializedFunc->hasExactDefinition() &&
+ !SpecializedFunc->hasFnAttribute(Attribute::Naked))
+ Solver.addTrackedFunction(SpecializedFunc);
+
+ Solver.addArgumentTrackedFunction(SpecializedFunc);
+ FuncDecls.push_back(SpecializedFunc);
+ Solver.markBlockExecutable(&SpecializedFunc->front());
+
+ // Replace the function arguments for the specialized functions.
+ for (Argument &Arg : SpecializedFunc->args())
+ if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg))
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: "
+ << Arg.getName() << "\n");
+ }
+ }
};
} // namespace
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index ba7589c2bf60..b1f3ff15c97b 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -305,8 +305,9 @@ static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
else if (auto *LI = dyn_cast<LoadInst>(U)) {
// A load from zeroinitializer is always zeroinitializer, regardless of
// any applied offset.
- if (Init->isNullValue()) {
- LI->replaceAllUsesWith(Constant::getNullValue(LI->getType()));
+ Type *Ty = LI->getType();
+ if (Init->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy()) {
+ LI->replaceAllUsesWith(Constant::getNullValue(Ty));
EraseFromParent(LI);
continue;
}
@@ -316,8 +317,7 @@ static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
DL, Offset, /* AllowNonInbounds */ true);
if (PtrOp == GV) {
- if (auto *Value = ConstantFoldLoadFromConst(Init, LI->getType(),
- Offset, DL)) {
+ if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) {
LI->replaceAllUsesWith(Value);
EraseFromParent(LI);
}
@@ -368,8 +368,7 @@ static bool isSafeSROAGEP(User *U) {
return false;
}
- return llvm::all_of(U->users(),
- [](User *UU) { return isSafeSROAElementUse(UU); });
+ return llvm::all_of(U->users(), isSafeSROAElementUse);
}
/// Return true if the specified instruction is a safe user of a derived
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index 833049d6896f..a964fcde0396 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -294,7 +294,7 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
// Find all incoming values from the outlining region.
int NumIncomingVals = 0;
for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
- if (find(Region, PN.getIncomingBlock(i)) != Region.end()) {
+ if (llvm::is_contained(Region, PN.getIncomingBlock(i))) {
++NumIncomingVals;
if (NumIncomingVals > 1) {
++NumSplitExitPhis;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 992c2b292e1e..4e3689f09536 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -856,6 +856,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (InlineHistoryID != -1 &&
inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
+ LLVM_DEBUG(dbgs() << "Skipping inlining due to history: "
+ << F.getName() << " -> " << Callee.getName() << "\n");
setInlineRemark(*CB, "recursive");
continue;
}
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index f78971f0e586..c0bb19e184d6 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1774,8 +1774,9 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
bool IsJumpTableCanonical) {
SmallSetVector<Constant *, 4> Constants;
for (Use &U : llvm::make_early_inc_range(Old->uses())) {
- // Skip block addresses
- if (isa<BlockAddress>(U.getUser()))
+ // Skip block addresses and no_cfi values, which refer to the function
+ // body instead of the jump table.
+ if (isa<BlockAddress, NoCFIValue>(U.getUser()))
continue;
// Skip direct calls to externally defined or non-dso_local functions
@@ -1802,7 +1803,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
}
void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
- Old->replaceUsesWithIf(New, [](Use &U) { return isDirectCall(U); });
+ Old->replaceUsesWithIf(New, isDirectCall);
}
bool LowerTypeTestsModule::lower() {
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 055ee6b50296..f289e3ecc979 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -3964,6 +3964,9 @@ struct AAKernelInfoCallSite : AAKernelInfo {
case OMPRTL___kmpc_master:
case OMPRTL___kmpc_end_master:
case OMPRTL___kmpc_barrier:
+ case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2:
+ case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2:
+ case OMPRTL___kmpc_nvptx_end_reduce_nowait:
break;
case OMPRTL___kmpc_distribute_static_init_4:
case OMPRTL___kmpc_distribute_static_init_4u:
@@ -4010,6 +4013,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
break;
case OMPRTL___kmpc_omp_task:
// We do not look into tasks right now, just give up.
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
ReachedUnknownParallelRegions.insert(&CB);
break;
@@ -4020,6 +4024,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
default:
// Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
// generally. However, they do not hide parallel regions.
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
break;
}
@@ -4079,6 +4084,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
SPMDCompatibilityTracker.insert(&CB);
break;
default:
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
}
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index bae9a1e27e75..7334bf695b67 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -32,7 +32,7 @@ ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
if (CalleeName.empty())
return getHottestChildContext(CallSite);
- uint64_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
auto It = AllChildContext.find(Hash);
if (It != AllChildContext.end())
return &It->second;
@@ -65,7 +65,8 @@ ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
ContextTrieNode &ContextTrieNode::moveToChildContext(
const LineLocation &CallSite, ContextTrieNode &&NodeToMove,
uint32_t ContextFramesToRemove, bool DeleteNode) {
- uint64_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite);
+ uint64_t Hash =
+ FunctionSamples::getCallSiteHash(NodeToMove.getFuncName(), CallSite);
assert(!AllChildContext.count(Hash) && "Node to remove must exist");
LineLocation OldCallSite = NodeToMove.CallSiteLoc;
ContextTrieNode &OldParentContext = *NodeToMove.getParentContext();
@@ -108,7 +109,7 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
void ContextTrieNode::removeChildContext(const LineLocation &CallSite,
StringRef CalleeName) {
- uint64_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
// Note this essentially calls dtor and destroys that child context
AllChildContext.erase(Hash);
}
@@ -174,21 +175,9 @@ void ContextTrieNode::dumpTree() {
}
}
-uint64_t ContextTrieNode::nodeHash(StringRef ChildName,
- const LineLocation &Callsite) {
- // We still use child's name for child hash, this is
- // because for children of root node, we don't have
- // different line/discriminator, and we'll rely on name
- // to differentiate children.
- uint64_t NameHash = std::hash<std::string>{}(ChildName.str());
- uint64_t LocId =
- (((uint64_t)Callsite.LineOffset) << 32) | Callsite.Discriminator;
- return NameHash + (LocId << 5) + LocId;
-}
-
ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) {
- uint64_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
auto It = AllChildContext.find(Hash);
if (It != AllChildContext.end()) {
assert(It->second.getFuncName() == CalleeName &&
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index b8fac9d47763..bc6051de90c4 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -467,6 +467,9 @@ protected:
void emitOptimizationRemarksForInlineCandidates(
const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
bool Hot);
+ void promoteMergeNotInlinedContextSamples(
+ DenseMap<CallBase *, const FunctionSamples *> NonInlinedCallSites,
+ const Function &F);
std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
void generateMDProfMetadata(Function &F);
@@ -485,7 +488,7 @@ protected:
std::unique_ptr<SampleContextTracker> ContextTracker;
/// Flag indicating whether input profile is context-sensitive
- bool ProfileIsCS = false;
+ bool ProfileIsCSFlat = false;
/// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
///
@@ -602,7 +605,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// call instruction should have 0 count.
// For CS profile, the callsite count of previously inlined callees is
// populated with the entry count of the callees.
- if (!ProfileIsCS)
+ if (!ProfileIsCSFlat)
if (const auto *CB = dyn_cast<CallBase>(&Inst))
if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
return 0;
@@ -641,7 +644,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
// call instruction should have 0 count.
// For CS profile, the callsite count of previously inlined callees is
// populated with the entry count of the callees.
- if (!ProfileIsCS)
+ if (!ProfileIsCSFlat)
if (const auto *CB = dyn_cast<CallBase>(&Inst))
if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
return 0;
@@ -695,7 +698,7 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
if (Function *Callee = Inst.getCalledFunction())
CalleeName = Callee->getName();
- if (ProfileIsCS)
+ if (ProfileIsCSFlat)
return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
const FunctionSamples *FS = findFunctionSamples(Inst);
@@ -727,7 +730,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
FunctionSamples::getGUID(R->getName());
};
- if (ProfileIsCS) {
+ if (ProfileIsCSFlat) {
auto CalleeSamples =
ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
if (CalleeSamples.empty())
@@ -780,7 +783,7 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
if (it.second) {
- if (ProfileIsCS)
+ if (ProfileIsCSFlat)
it.first->second = ContextTracker->getContextSamplesFor(DIL);
else
it.first->second =
@@ -1039,7 +1042,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
// For AutoFDO profile, retrieve candidate profiles by walking over
// the nested inlinee profiles.
- if (!ProfileIsCS) {
+ if (!ProfileIsCSFlat) {
Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
return;
}
@@ -1134,7 +1137,7 @@ bool SampleProfileLoader::inlineHotFunctions(
assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
"GUIDToFuncNameMap has to be populated");
AllCandidates.push_back(CB);
- if (FS->getEntrySamples() > 0 || ProfileIsCS)
+ if (FS->getEntrySamples() > 0 || ProfileIsCSFlat)
LocalNotInlinedCallSites.try_emplace(CB, FS);
if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
Hot = true;
@@ -1156,11 +1159,9 @@ bool SampleProfileLoader::inlineHotFunctions(
}
for (CallBase *I : CIS) {
Function *CalledFunction = I->getCalledFunction();
- InlineCandidate Candidate = {
- I,
- LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
- : nullptr,
- 0 /* dummy count */, 1.0 /* dummy distribution factor */};
+ InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
+ 0 /* dummy count */,
+ 1.0 /* dummy distribution factor */};
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
@@ -1198,53 +1199,9 @@ bool SampleProfileLoader::inlineHotFunctions(
}
// For CS profile, profile for not inlined context will be merged when
- // base profile is being trieved
- if (ProfileIsCS)
- return Changed;
-
- // Accumulate not inlined callsite information into notInlinedSamples
- for (const auto &Pair : LocalNotInlinedCallSites) {
- CallBase *I = Pair.getFirst();
- Function *Callee = I->getCalledFunction();
- if (!Callee || Callee->isDeclaration())
- continue;
-
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
- I->getDebugLoc(), I->getParent())
- << "previous inlining not repeated: '"
- << ore::NV("Callee", Callee) << "' into '"
- << ore::NV("Caller", &F) << "'");
-
- ++NumCSNotInlined;
- const FunctionSamples *FS = Pair.getSecond();
- if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
- continue;
- }
-
- if (ProfileMergeInlinee) {
- // A function call can be replicated by optimizations like callsite
- // splitting or jump threading and the replicates end up sharing the
- // sample nested callee profile instead of slicing the original inlinee's
- // profile. We want to do merge exactly once by filtering out callee
- // profiles with a non-zero head sample count.
- if (FS->getHeadSamples() == 0) {
- // Use entry samples as head samples during the merge, as inlinees
- // don't have head samples.
- const_cast<FunctionSamples *>(FS)->addHeadSamples(
- FS->getEntrySamples());
-
- // Note that we have to do the merge right after processing function.
- // This allows OutlineFS's profile to be used for annotation during
- // top-down processing of functions' annotation.
- FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
- OutlineFS->merge(*FS);
- }
- } else {
- auto pair =
- notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
- pair.first->second.entryCount += FS->getEntrySamples();
- }
- }
+ // base profile is being retrieved.
+ if (!FunctionSamples::ProfileIsCSFlat)
+ promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
return Changed;
}
@@ -1285,7 +1242,7 @@ bool SampleProfileLoader::tryInlineCandidate(
InlinedCallSites->push_back(I);
}
- if (ProfileIsCS)
+ if (ProfileIsCSFlat)
ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
++NumCSInlined;
@@ -1430,7 +1387,6 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
bool SampleProfileLoader::inlineHotFunctionsWithPriority(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
- assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
// Profile symbol list is ignored when profile-sample-accurate is on.
@@ -1467,6 +1423,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
if (ExternalInlineAdvisor)
SizeLimit = std::numeric_limits<unsigned>::max();
+ DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
+
// Perform iterative BFS call site prioritized inlining
bool Changed = false;
while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
@@ -1521,6 +1479,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
}
ICPCount++;
Changed = true;
+ } else if (!ContextTracker) {
+ LocalNotInlinedCallSites.try_emplace(I, FS);
}
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
@@ -1532,6 +1492,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
CQueue.emplace(NewCandidate);
}
Changed = true;
+ } else if (!ContextTracker) {
+ LocalNotInlinedCallSites.try_emplace(I, Candidate.CalleeSamples);
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
@@ -1549,9 +1511,63 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
++NumCSInlinedHitGrowthLimit;
}
+ // For CS profile, profile for not inlined context will be merged when
+ // base profile is being retrieved.
+ if (!FunctionSamples::ProfileIsCSFlat)
+ promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
return Changed;
}
+void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
+ DenseMap<CallBase *, const FunctionSamples *> NonInlinedCallSites,
+ const Function &F) {
+ // Accumulate not inlined callsite information into notInlinedSamples
+ for (const auto &Pair : NonInlinedCallSites) {
+ CallBase *I = Pair.getFirst();
+ Function *Callee = I->getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ continue;
+
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
+ I->getDebugLoc(), I->getParent())
+ << "previous inlining not repeated: '"
+ << ore::NV("Callee", Callee) << "' into '"
+ << ore::NV("Caller", &F) << "'");
+
+ ++NumCSNotInlined;
+ const FunctionSamples *FS = Pair.getSecond();
+ if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
+ continue;
+ }
+
+ if (ProfileMergeInlinee) {
+ // A function call can be replicated by optimizations like callsite
+ // splitting or jump threading and the replicates end up sharing the
+ // sample nested callee profile instead of slicing the original
+ // inlinee's profile. We want to do merge exactly once by filtering out
+ // callee profiles with a non-zero head sample count.
+ if (FS->getHeadSamples() == 0) {
+ // Use entry samples as head samples during the merge, as inlinees
+ // don't have head samples.
+ const_cast<FunctionSamples *>(FS)->addHeadSamples(
+ FS->getEntrySamples());
+
+ // Note that we have to do the merge right after processing function.
+ // This allows OutlineFS's profile to be used for annotation during
+ // top-down processing of functions' annotation.
+ FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee);
+ OutlineFS->merge(*FS, 1);
+ // Set outlined profile to be synthetic to not bias the inliner.
+ OutlineFS->SetContextSynthetic();
+ }
+ } else {
+ auto pair =
+ notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
+ pair.first->second.entryCount += FS->getEntrySamples();
+ }
+ }
+}
+
/// Returns the sorted CallTargetMap \p M by count in descending order.
static SmallVector<InstrProfValueData, 2>
GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M) {
@@ -1607,7 +1623,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
// With CSSPGO all indirect call targets are counted torwards the
// original indirect call site in the profile, including both
// inlined and non-inlined targets.
- if (!FunctionSamples::ProfileIsCS) {
+ if (!FunctionSamples::ProfileIsCSFlat) {
if (const FunctionSamplesMap *M =
FS->findFunctionSamplesMapAt(CallSite)) {
for (const auto &NameFS : *M)
@@ -1754,7 +1770,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
}
DenseSet<GlobalValue::GUID> InlinedGUIDs;
- if (ProfileIsCS && CallsitePrioritizedInline)
+ if (CallsitePrioritizedInline)
Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
else
Changed |= inlineHotFunctions(F, InlinedGUIDs);
@@ -1782,7 +1798,7 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
std::unique_ptr<ProfiledCallGraph>
SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
std::unique_ptr<ProfiledCallGraph> ProfiledCG;
- if (ProfileIsCS)
+ if (ProfileIsCSFlat)
ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
else
ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
@@ -1828,7 +1844,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
assert(&CG->getModule() == &M);
if (UseProfiledCallGraph ||
- (ProfileIsCS && !UseProfiledCallGraph.getNumOccurrences())) {
+ (ProfileIsCSFlat && !UseProfiledCallGraph.getNumOccurrences())) {
// Use profiled call edges to augment the top-down order. There are cases
// that the top-down order computed based on the static call graph doesn't
// reflect real execution order. For example
@@ -1961,10 +1977,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
}
// Apply tweaks if context-sensitive profile is available.
- if (Reader->profileIsCS()) {
- ProfileIsCS = true;
- FunctionSamples::ProfileIsCS = true;
-
+ if (Reader->profileIsCSFlat() || Reader->profileIsCSNested()) {
+ ProfileIsCSFlat = Reader->profileIsCSFlat();
// Enable priority-base inliner and size inline by default for CSSPGO.
if (!ProfileSizeInline.getNumOccurrences())
ProfileSizeInline = true;
@@ -1982,10 +1996,15 @@ bool SampleProfileLoader::doInitialization(Module &M,
// Enable iterative-BFI by default for CSSPGO.
if (!UseIterativeBFIInference.getNumOccurrences())
UseIterativeBFIInference = true;
+ // Enable Profi by default for CSSPGO.
+ if (!SampleProfileUseProfi.getNumOccurrences())
+ SampleProfileUseProfi = true;
- // Tracker for profiles under different context
- ContextTracker = std::make_unique<SampleContextTracker>(
- Reader->getProfiles(), &GUIDToFuncNameMap);
+ if (FunctionSamples::ProfileIsCSFlat) {
+ // Tracker for profiles under different context
+ ContextTracker = std::make_unique<SampleContextTracker>(
+ Reader->getProfiles(), &GUIDToFuncNameMap);
+ }
}
// Load pseudo probe descriptors for probe-based function samples.
@@ -1994,7 +2013,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
if (!ProbeManager->moduleIsProbed(M)) {
const char *Msg =
"Pseudo-probe-based profile requires SampleProfileProbePass";
- Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
+ DS_Warning));
return false;
}
}
@@ -2062,7 +2082,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
}
// Account for cold calls not inlined....
- if (!ProfileIsCS)
+ if (!ProfileIsCSFlat)
for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
notInlinedCallInfo)
updateProfileCallee(pair.first, pair.second.entryCount);
@@ -2138,7 +2158,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
ORE = OwnedORE.get();
}
- if (ProfileIsCS)
+ if (ProfileIsCSFlat)
Samples = ContextTracker->getBaseSamplesFor(F);
else
Samples = Reader->getSamplesFor(F);
diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 0cc1b37844f6..daaf6cbeb3fd 100644
--- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -87,7 +87,8 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
if (isa<Function>(&ExportGV) && allowPromotionAlias(OldName)) {
// Create a local alias with the original name to avoid breaking
// references from inline assembly.
- std::string Alias = ".set " + OldName + "," + NewName + "\n";
+ std::string Alias =
+ ".lto_set_conditional " + OldName + "," + NewName + "\n";
ExportM.appendModuleInlineAsm(Alias);
}
}
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 61054e7ae46f..6acace1d9fd4 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -359,6 +359,36 @@ template <> struct DenseMapInfo<VTableSlotSummary> {
namespace {
+// Returns true if the function must be unreachable based on ValueInfo.
+//
+// In particular, identifies a function as unreachable in the following
+// conditions
+// 1) All summaries are live.
+// 2) All function summaries indicate it's unreachable
+bool mustBeUnreachableFunction(ValueInfo TheFnVI) {
+ if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
+ // Returns false if ValueInfo is absent, or the summary list is empty
+ // (e.g., function declarations).
+ return false;
+ }
+
+ for (auto &Summary : TheFnVI.getSummaryList()) {
+ // Conservatively returns false if any non-live functions are seen.
+ // In general either all summaries should be live or all should be dead.
+ if (!Summary->isLive())
+ return false;
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
+ if (!FS->fflags().MustBeUnreachable)
+ return false;
+ }
+ // Do nothing if a non-function has the same GUID (which is rare).
+ // This is correct since non-function summaries are not relevant.
+ }
+ // All function summaries are live and all of them agree that the function is
+ // unreachble.
+ return true;
+}
+
// A virtual call site. VTable is the loaded virtual table pointer, and CS is
// the indirect virtual call.
struct VirtualCallSite {
@@ -562,10 +592,12 @@ struct DevirtModule {
void buildTypeIdentifierMap(
std::vector<VTableBits> &Bits,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
+
bool
tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
const std::set<TypeMemberInfo> &TypeMemberInfos,
- uint64_t ByteOffset);
+ uint64_t ByteOffset,
+ ModuleSummaryIndex *ExportSummary);
void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
bool &IsExported);
@@ -640,6 +672,23 @@ struct DevirtModule {
bool run();
+ // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
+ //
+ // Caller guarantees that `ExportSummary` is not nullptr.
+ static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
+ ModuleSummaryIndex *ExportSummary);
+
+ // Returns true if the function definition must be unreachable.
+ //
+ // Note if this helper function returns true, `F` is guaranteed
+ // to be unreachable; if it returns false, `F` might still
+ // be unreachable but not covered by this helper function.
+ //
+ // Implementation-wise, if function definition is present, IR is analyzed; if
+ // not, look up function flags from ExportSummary as a fallback.
+ static bool mustBeUnreachableFunction(Function *const F,
+ ModuleSummaryIndex *ExportSummary);
+
// Lower the module using the action and summary passed as command line
// arguments. For testing purposes only.
static bool
@@ -969,7 +1018,8 @@ void DevirtModule::buildTypeIdentifierMap(
bool DevirtModule::tryFindVirtualCallTargets(
std::vector<VirtualCallTarget> &TargetsForSlot,
- const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) {
+ const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
+ ModuleSummaryIndex *ExportSummary) {
for (const TypeMemberInfo &TM : TypeMemberInfos) {
if (!TM.Bits->GV->isConstant())
return false;
@@ -997,6 +1047,11 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (Fn->getName() == "__cxa_pure_virtual")
continue;
+ // We can disregard unreachable functions as possible call targets, as
+ // unreachable functions shouldn't be called.
+ if (mustBeUnreachableFunction(Fn, ExportSummary))
+ continue;
+
TargetsForSlot.push_back({Fn, &TM});
}
@@ -1053,6 +1108,9 @@ bool DevirtIndex::tryFindVirtualCallTargets(
if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
continue;
+ if (mustBeUnreachableFunction(VTP.FuncVI))
+ continue;
+
TargetsForSlot.push_back(VTP.FuncVI);
}
}
@@ -1744,7 +1802,7 @@ void DevirtModule::rebuildGlobal(VTableBits &B) {
GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
NewGV->setSection(B.GV->getSection());
NewGV->setComdat(B.GV->getComdat());
- NewGV->setAlignment(MaybeAlign(B.GV->getAlignment()));
+ NewGV->setAlignment(B.GV->getAlign());
// Copy the original vtable's metadata to the anonymous global, adjusting
// offsets as required.
@@ -2014,6 +2072,44 @@ void DevirtModule::removeRedundantTypeTests() {
}
}
+ValueInfo
+DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
+ ModuleSummaryIndex *ExportSummary) {
+ assert((ExportSummary != nullptr) &&
+ "Caller guarantees ExportSummary is not nullptr");
+
+ const auto TheFnGUID = TheFn->getGUID();
+ const auto TheFnGUIDWithExportedName = GlobalValue::getGUID(TheFn->getName());
+ // Look up ValueInfo with the GUID in the current linkage.
+ ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
+ // If no entry is found and GUID is different from GUID computed using
+ // exported name, look up ValueInfo with the exported name unconditionally.
+ // This is a fallback.
+ //
+ // The reason to have a fallback:
+ // 1. LTO could enable global value internalization via
+ // `enable-lto-internalization`.
+ // 2. The GUID in ExportedSummary is computed using exported name.
+ if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
+ TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
+ }
+ return TheFnVI;
+}
+
+bool DevirtModule::mustBeUnreachableFunction(
+ Function *const F, ModuleSummaryIndex *ExportSummary) {
+ // First, learn unreachability by analyzing function IR.
+ if (!F->isDeclaration()) {
+ // A function must be unreachable if its entry block ends with an
+ // 'unreachable'.
+ return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
+ }
+ // Learn unreachability from ExportSummary if ExportSummary is present.
+ return ExportSummary &&
+ ::mustBeUnreachableFunction(
+ DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
+}
+
bool DevirtModule::run() {
// If only some of the modules were split, we cannot correctly perform
// this transformation. We already checked for the presense of type tests
@@ -2137,7 +2233,7 @@ bool DevirtModule::run() {
cast<MDString>(S.first.TypeID)->getString())
.WPDRes[S.first.ByteOffset];
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
- S.first.ByteOffset)) {
+ S.first.ByteOffset, ExportSummary)) {
if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
DidVirtualConstProp |=
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index dc55b5a31596..de1034c910d5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1795,6 +1795,55 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
}
}
+ // (~A & B & C) | ... --> ...
+ // (~A | B | C) | ... --> ...
+ // TODO: One use checks are conservative. We just need to check that a total
+ // number of multiple used values does not exceed reduction
+ // in operations.
+ if (match(Op0,
+ m_OneUse(m_c_BinOp(FlippedOpcode,
+ m_BinOp(FlippedOpcode, m_Value(B), m_Value(C)),
+ m_CombineAnd(m_Value(X), m_Not(m_Value(A)))))) ||
+ match(Op0, m_OneUse(m_c_BinOp(
+ FlippedOpcode,
+ m_c_BinOp(FlippedOpcode, m_Value(C),
+ m_CombineAnd(m_Value(X), m_Not(m_Value(A)))),
+ m_Value(B))))) {
+ // X = ~A
+ // (~A & B & C) | ~(A | B | C) --> ~(A | (B ^ C))
+ // (~A | B | C) & ~(A & B & C) --> (~A | (B ^ C))
+ if (match(Op1, m_OneUse(m_Not(m_c_BinOp(
+ Opcode, m_c_BinOp(Opcode, m_Specific(A), m_Specific(B)),
+ m_Specific(C))))) ||
+ match(Op1, m_OneUse(m_Not(m_c_BinOp(
+ Opcode, m_c_BinOp(Opcode, m_Specific(B), m_Specific(C)),
+ m_Specific(A))))) ||
+ match(Op1, m_OneUse(m_Not(m_c_BinOp(
+ Opcode, m_c_BinOp(Opcode, m_Specific(A), m_Specific(C)),
+ m_Specific(B)))))) {
+ Value *Xor = Builder.CreateXor(B, C);
+ return (Opcode == Instruction::Or)
+ ? BinaryOperator::CreateNot(Builder.CreateOr(Xor, A))
+ : BinaryOperator::CreateOr(Xor, X);
+ }
+
+ // (~A & B & C) | ~(A | B) --> (C | ~B) & ~A
+ // (~A | B | C) & ~(A & B) --> (C & ~B) | ~A
+ if (match(Op1, m_OneUse(m_Not(m_OneUse(
+ m_c_BinOp(Opcode, m_Specific(A), m_Specific(B)))))))
+ return BinaryOperator::Create(
+ FlippedOpcode, Builder.CreateBinOp(Opcode, C, Builder.CreateNot(B)),
+ X);
+
+ // (~A & B & C) | ~(A | C) --> (B | ~C) & ~A
+ // (~A | B | C) & ~(A & C) --> (B & ~C) | ~A
+ if (match(Op1, m_OneUse(m_Not(m_OneUse(
+ m_c_BinOp(Opcode, m_Specific(A), m_Specific(C)))))))
+ return BinaryOperator::Create(
+ FlippedOpcode, Builder.CreateBinOp(Opcode, B, Builder.CreateNot(C)),
+ X);
+ }
+
return nullptr;
}
@@ -2102,6 +2151,15 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
return SelectInst::Create(Cmp, Y, Zero);
}
+ // If there's a 'not' of the shifted value, swap the select operands:
+ // ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y
+ if (match(&I, m_c_And(m_OneUse(m_Not(
+ m_AShr(m_Value(X), m_SpecificInt(FullShift)))),
+ m_Value(Y)))) {
+ Constant *Zero = ConstantInt::getNullValue(Ty);
+ Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
+ return SelectInst::Create(Cmp, Zero, Y);
+ }
// (~x) & y --> ~(x | (~y)) iff that gets rid of inversions
if (sinkNotIntoOtherHandOfAndOrOr(I))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 7da2669e1d13..14427bd1f2f4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2472,6 +2472,12 @@ static bool isSafeToEliminateVarargsCast(const CallBase &Call,
Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
if (!CI->getCalledFunction()) return nullptr;
+ // Skip optimizing notail and musttail calls so
+ // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
+ // LibCallSimplifier::optimizeCall should try to preseve tail calls though.
+ if (CI->isMustTailCall() || CI->isNoTailCall())
+ return nullptr;
+
auto InstCombineRAUW = [this](Instruction *From, Value *With) {
replaceInstUsesWith(*From, With);
};
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 33f217659c01..8df4a4529f47 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -157,7 +157,7 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
Amt = Builder.CreateAdd(Amt, Off);
}
- AllocaInst *New = Builder.CreateAlloca(CastElTy, Amt);
+ AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt);
New->setAlignment(AI.getAlign());
New->takeName(&AI);
New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
@@ -965,13 +965,13 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
if (match(Src, m_VScale(DL))) {
if (Trunc.getFunction() &&
Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
- unsigned MaxVScale = Trunc.getFunction()
- ->getFnAttribute(Attribute::VScaleRange)
- .getVScaleRangeArgs()
- .second;
- if (MaxVScale > 0 && Log2_32(MaxVScale) < DestWidth) {
- Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
- return replaceInstUsesWith(Trunc, VScale);
+ Attribute Attr =
+ Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
+ if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
+ if (Log2_32(MaxVScale.getValue()) < DestWidth) {
+ Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
+ return replaceInstUsesWith(Trunc, VScale);
+ }
}
}
}
@@ -1337,14 +1337,13 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
if (match(Src, m_VScale(DL))) {
if (CI.getFunction() &&
CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
- unsigned MaxVScale = CI.getFunction()
- ->getFnAttribute(Attribute::VScaleRange)
- .getVScaleRangeArgs()
- .second;
- unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
- if (MaxVScale > 0 && Log2_32(MaxVScale) < TypeWidth) {
- Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
- return replaceInstUsesWith(CI, VScale);
+ Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);
+ if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
+ unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
+ if (Log2_32(MaxVScale.getValue()) < TypeWidth) {
+ Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
+ return replaceInstUsesWith(CI, VScale);
+ }
}
}
}
@@ -1608,13 +1607,12 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
if (match(Src, m_VScale(DL))) {
if (CI.getFunction() &&
CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
- unsigned MaxVScale = CI.getFunction()
- ->getFnAttribute(Attribute::VScaleRange)
- .getVScaleRangeArgs()
- .second;
- if (MaxVScale > 0 && Log2_32(MaxVScale) < (SrcBitSize - 1)) {
- Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
- return replaceInstUsesWith(CI, VScale);
+ Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);
+ if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
+ if (Log2_32(MaxVScale.getValue()) < (SrcBitSize - 1)) {
+ Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
+ return replaceInstUsesWith(CI, VScale);
+ }
}
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 20c75188ec9f..39b55b028110 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -600,6 +600,7 @@ public:
/// Canonicalize the position of binops relative to shufflevector.
Instruction *foldVectorBinop(BinaryOperator &Inst);
Instruction *foldVectorSelect(SelectInst &Sel);
+ Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf);
/// Given a binary operator, cast instruction, or select which has a PHI node
/// as operand #0, see if we can fold the instruction into the PHI (which is
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 79a8a065d02a..0dbfdba353c4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -163,7 +163,7 @@ static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
if (!AllocaSize)
return false;
- return isDereferenceableAndAlignedPointer(V, Align(AI->getAlignment()),
+ return isDereferenceableAndAlignedPointer(V, AI->getAlign(),
APInt(64, AllocaSize), DL);
}
@@ -183,7 +183,8 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
if (C->getValue().getActiveBits() <= 64) {
Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
+ AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
+ nullptr, AI.getName());
New->setAlignment(AI.getAlign());
// Scan to the end of the allocation instructions, to skip over a block of
@@ -199,21 +200,13 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
Value *NullIdx = Constant::getNullValue(IdxTy);
Value *Idx[2] = {NullIdx, NullIdx};
- Instruction *NewI = GetElementPtrInst::CreateInBounds(
+ Instruction *GEP = GetElementPtrInst::CreateInBounds(
NewTy, New, Idx, New->getName() + ".sub");
- IC.InsertNewInstBefore(NewI, *It);
-
- // Gracefully handle allocas in other address spaces.
- if (AI.getType()->getPointerAddressSpace() !=
- NewI->getType()->getPointerAddressSpace()) {
- NewI =
- CastInst::CreatePointerBitCastOrAddrSpaceCast(NewI, AI.getType());
- IC.InsertNewInstBefore(NewI, *It);
- }
+ IC.InsertNewInstBefore(GEP, *It);
// Now make everything use the getelementptr instead of the original
// allocation.
- return IC.replaceInstUsesWith(AI, NewI);
+ return IC.replaceInstUsesWith(AI, GEP);
}
}
@@ -640,7 +633,6 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
return nullptr;
StringRef Name = LI.getName();
- assert(LI.getAlignment() && "Alignment must be set at this point");
if (auto *ST = dyn_cast<StructType>(T)) {
// If the struct only have one element, we unpack.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 779d298da7a4..aca7ec8d7325 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -755,6 +755,15 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
if (simplifyDivRemOfSelectWithZeroOp(I))
return &I;
+ // If the divisor is a select-of-constants, try to constant fold all div ops:
+ // C / (select Cond, TrueC, FalseC) --> select Cond, (C / TrueC), (C / FalseC)
+ // TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds.
+ if (match(Op0, m_ImmConstant()) &&
+ match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) {
+ if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1)))
+ return R;
+ }
+
const APInt *C2;
if (match(Op1, m_APInt(C2))) {
Value *X;
@@ -1461,6 +1470,15 @@ Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) {
if (simplifyDivRemOfSelectWithZeroOp(I))
return &I;
+ // If the divisor is a select-of-constants, try to constant fold all rem ops:
+ // C % (select Cond, TrueC, FalseC) --> select Cond, (C % TrueC), (C % FalseC)
+ // TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds.
+ if (match(Op0, m_ImmConstant()) &&
+ match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) {
+ if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1)))
+ return R;
+ }
+
if (isa<Constant>(Op1)) {
if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 35739c3b9a21..30f6aab2114b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -664,10 +664,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
return nullptr;
// When processing loads, we need to propagate two bits of information to the
- // sunk load: whether it is volatile, and what its alignment is. We currently
- // don't sink loads when some have their alignment specified and some don't.
- // visitLoadInst will propagate an alignment onto the load when TD is around,
- // and if TD isn't around, we can't handle the mixed case.
+ // sunk load: whether it is volatile, and what its alignment is.
bool isVolatile = FirstLI->isVolatile();
Align LoadAlignment = FirstLI->getAlign();
unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
@@ -699,7 +696,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
!isSafeAndProfitableToSinkLoad(LI))
return nullptr;
- LoadAlignment = std::min(LoadAlignment, Align(LI->getAlign()));
+ LoadAlignment = std::min(LoadAlignment, LI->getAlign());
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 518d3952dce5..a6d6b5199105 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1482,7 +1482,12 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,
if (C0->getType() != Sel.getType())
return nullptr;
- // FIXME: are there any magic icmp predicate+constant pairs we must not touch?
+ // ULT with 'add' of a constant is canonical. See foldICmpAddConstant().
+ // FIXME: Are there more magic icmp predicate+constant pairs we must avoid?
+ // Or should we just abandon this transform entirely?
+ if (Pred == CmpInst::ICMP_ULT && match(X, m_Add(m_Value(), m_Constant())))
+ return nullptr;
+
Value *SelVal0, *SelVal1; // We do not care which one is from where.
match(&Sel, m_Select(m_Value(), m_Value(SelVal0), m_Value(SelVal1)));
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index e357a9da8b12..4dc712f32536 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1595,12 +1595,6 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
- // Any change to an instruction with potential poison must clear those flags
- // because we can not guarantee those constraints now. Other analysis may
- // determine that it is safe to re-apply the flags.
- if (MadeChange)
- BO->dropPoisonGeneratingFlags();
-
// Output elements are undefined if both are undefined. Consider things
// like undef & 0. The result is known zero, not undef.
UndefElts &= UndefElts2;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 32e537897140..c6a4602e59e3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -363,6 +363,18 @@ static APInt findDemandedEltsByAllUsers(Value *V) {
return UnionUsedElts;
}
+/// Given a constant index for a extractelement or insertelement instruction,
+/// return it with the canonical type if it isn't already canonical. We
+/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
+/// matter, we just want a consistent type to simplify CSE.
+ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {
+ const unsigned IndexBW = IndexC->getType()->getBitWidth();
+ if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
+ return nullptr;
+ return ConstantInt::get(IndexC->getContext(),
+ IndexC->getValue().zextOrTrunc(64));
+}
+
Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
Value *SrcVec = EI.getVectorOperand();
Value *Index = EI.getIndexOperand();
@@ -374,6 +386,10 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
// find a previously computed scalar that was inserted into the vector.
auto *IndexC = dyn_cast<ConstantInt>(Index);
if (IndexC) {
+ // Canonicalize type of constant indices to i64 to simplify CSE
+ if (auto *NewIdx = getPreferredVectorIndex(IndexC))
+ return replaceOperand(EI, 1, NewIdx);
+
ElementCount EC = EI.getVectorOperandType()->getElementCount();
unsigned NumElts = EC.getKnownMinValue();
@@ -401,37 +417,6 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
return nullptr;
- // This instruction only demands the single element from the input vector.
- // Skip for scalable type, the number of elements is unknown at
- // compile-time.
- if (!EC.isScalable() && NumElts != 1) {
- // If the input vector has a single use, simplify it based on this use
- // property.
- if (SrcVec->hasOneUse()) {
- APInt UndefElts(NumElts, 0);
- APInt DemandedElts(NumElts, 0);
- DemandedElts.setBit(IndexC->getZExtValue());
- if (Value *V =
- SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
- return replaceOperand(EI, 0, V);
- } else {
- // If the input vector has multiple uses, simplify it based on a union
- // of all elements used.
- APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
- if (!DemandedElts.isAllOnes()) {
- APInt UndefElts(NumElts, 0);
- if (Value *V = SimplifyDemandedVectorElts(
- SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
- true /* AllowMultipleUsers */)) {
- if (V != SrcVec) {
- SrcVec->replaceAllUsesWith(V);
- return &EI;
- }
- }
- }
- }
- }
-
if (Instruction *I = foldBitcastExtElt(EI))
return I;
@@ -473,11 +458,9 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
if (auto *I = dyn_cast<Instruction>(SrcVec)) {
if (auto *IE = dyn_cast<InsertElementInst>(I)) {
- // Extracting the inserted element?
- if (IE->getOperand(2) == Index)
- return replaceInstUsesWith(EI, IE->getOperand(1));
- // If the inserted and extracted elements are constants, they must not
- // be the same value, extract from the pre-inserted value instead.
+ // instsimplify already handled the case where the indices are constants
+ // and equal by value, if both are constants, they must not be the same
+ // value, extract from the pre-inserted value instead.
if (isa<Constant>(IE->getOperand(2)) && IndexC)
return replaceOperand(EI, 0, IE->getOperand(0));
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
@@ -497,30 +480,27 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
llvm::count_if(GEP->operands(), [](const Value *V) {
return isa<VectorType>(V->getType());
});
- if (VectorOps > 1)
- return nullptr;
- assert(VectorOps == 1 && "Expected exactly one vector GEP operand!");
+ if (VectorOps == 1) {
+ Value *NewPtr = GEP->getPointerOperand();
+ if (isa<VectorType>(NewPtr->getType()))
+ NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
- Value *NewPtr = GEP->getPointerOperand();
- if (isa<VectorType>(NewPtr->getType()))
- NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
+ SmallVector<Value *> NewOps;
+ for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
+ Value *Op = GEP->getOperand(I);
+ if (isa<VectorType>(Op->getType()))
+ NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
+ else
+ NewOps.push_back(Op);
+ }
- SmallVector<Value *> NewOps;
- for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
- Value *Op = GEP->getOperand(I);
- if (isa<VectorType>(Op->getType()))
- NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
- else
- NewOps.push_back(Op);
+ GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
+ cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr,
+ NewOps);
+ NewGEP->setIsInBounds(GEP->isInBounds());
+ return NewGEP;
}
-
- GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
- cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr,
- NewOps);
- NewGEP->setIsInBounds(GEP->isInBounds());
- return NewGEP;
}
- return nullptr;
} else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
// If this is extracting an element from a shufflevector, figure out where
// it came from and extract from the appropriate input element instead.
@@ -554,6 +534,44 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
}
}
}
+
+ // Run demanded elements after other transforms as this can drop flags on
+ // binops. If there's two paths to the same final result, we prefer the
+ // one which doesn't force us to drop flags.
+ if (IndexC) {
+ ElementCount EC = EI.getVectorOperandType()->getElementCount();
+ unsigned NumElts = EC.getKnownMinValue();
+ // This instruction only demands the single element from the input vector.
+ // Skip for scalable type, the number of elements is unknown at
+ // compile-time.
+ if (!EC.isScalable() && NumElts != 1) {
+ // If the input vector has a single use, simplify it based on this use
+ // property.
+ if (SrcVec->hasOneUse()) {
+ APInt UndefElts(NumElts, 0);
+ APInt DemandedElts(NumElts, 0);
+ DemandedElts.setBit(IndexC->getZExtValue());
+ if (Value *V =
+ SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
+ return replaceOperand(EI, 0, V);
+ } else {
+ // If the input vector has multiple uses, simplify it based on a union
+ // of all elements used.
+ APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
+ if (!DemandedElts.isAllOnes()) {
+ APInt UndefElts(NumElts, 0);
+ if (Value *V = SimplifyDemandedVectorElts(
+ SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
+ true /* AllowMultipleUsers */)) {
+ if (V != SrcVec) {
+ SrcVec->replaceAllUsesWith(V);
+ return &EI;
+ }
+ }
+ }
+ }
+ }
+ }
return nullptr;
}
@@ -1476,6 +1494,11 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
return replaceInstUsesWith(IE, V);
+ // Canonicalize type of constant indices to i64 to simplify CSE
+ if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp))
+ if (auto *NewIdx = getPreferredVectorIndex(IndexC))
+ return replaceOperand(IE, 2, NewIdx);
+
// If the scalar is bitcast and inserted into undef, do the insert in the
// source type followed by bitcast.
// TODO: Generalize for insert into any constant, not just undef?
@@ -2008,9 +2031,7 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
}
/// Try to fold shuffles that are the equivalent of a vector select.
-static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
- InstCombiner::BuilderTy &Builder,
- const DataLayout &DL) {
+Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
if (!Shuf.isSelect())
return nullptr;
@@ -2118,21 +2139,23 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
V = Builder.CreateShuffleVector(X, Y, Mask);
}
- Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
- BinaryOperator::Create(BOpc, NewC, V);
+ Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
+ Builder.CreateBinOp(BOpc, NewC, V);
// Flags are intersected from the 2 source binops. But there are 2 exceptions:
// 1. If we changed an opcode, poison conditions might have changed.
// 2. If the shuffle had undef mask elements, the new binop might have undefs
// where the original code did not. But if we already made a safe constant,
// then there's no danger.
- NewBO->copyIRFlags(B0);
- NewBO->andIRFlags(B1);
- if (DropNSW)
- NewBO->setHasNoSignedWrap(false);
- if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
- NewBO->dropPoisonGeneratingFlags();
- return NewBO;
+ if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
+ NewI->copyIRFlags(B0);
+ NewI->andIRFlags(B1);
+ if (DropNSW)
+ NewI->setHasNoSignedWrap(false);
+ if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
+ NewI->dropPoisonGeneratingFlags();
+ }
+ return replaceInstUsesWith(Shuf, NewBO);
}
/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
@@ -2497,7 +2520,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
return I;
- if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
+ if (Instruction *I = foldSelectShuffle(SVI))
return I;
if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 1f81624f79e7..eb5eadba194d 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2546,7 +2546,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return nullptr;
}
-static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI,
+static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI,
Instruction *AI) {
if (isa<ConstantPointerNull>(V))
return true;
@@ -2557,12 +2557,34 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI,
// through bitcasts of V can cause
// the result statement below to be true, even when AI and V (ex:
// i8* ->i32* ->i8* of AI) are the same allocations.
- return isAllocLikeFn(V, TLI) && V != AI;
+ return isAllocLikeFn(V, &TLI) && V != AI;
+}
+
+/// Given a call CB which uses an address UsedV, return true if we can prove the
+/// call's only possible effect is storing to V.
+static bool isRemovableWrite(CallBase &CB, Value *UsedV,
+ const TargetLibraryInfo &TLI) {
+ if (!CB.use_empty())
+ // TODO: add recursion if returned attribute is present
+ return false;
+
+ if (CB.isTerminator())
+ // TODO: remove implementation restriction
+ return false;
+
+ if (!CB.willReturn() || !CB.doesNotThrow())
+ return false;
+
+ // If the only possible side effect of the call is writing to the alloca,
+ // and the result isn't used, we can safely remove any reads implied by the
+ // call including those which might read the alloca itself.
+ Optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
+ return Dest && Dest->Ptr == UsedV;
}
static bool isAllocSiteRemovable(Instruction *AI,
SmallVectorImpl<WeakTrackingVH> &Users,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo &TLI) {
SmallVector<Instruction*, 4> Worklist;
Worklist.push_back(AI);
@@ -2627,12 +2649,17 @@ static bool isAllocSiteRemovable(Instruction *AI,
}
}
- if (isFreeCall(I, TLI)) {
+ if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
+ Users.emplace_back(I);
+ continue;
+ }
+
+ if (isFreeCall(I, &TLI)) {
Users.emplace_back(I);
continue;
}
- if (isReallocLikeFn(I, TLI, true)) {
+ if (isReallocLikeFn(I, &TLI, true)) {
Users.emplace_back(I);
Worklist.push_back(I);
continue;
@@ -2676,7 +2703,7 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
}
- if (isAllocSiteRemovable(&MI, Users, &TLI)) {
+ if (isAllocSiteRemovable(&MI, Users, TLI)) {
for (unsigned i = 0, e = Users.size(); i != e; ++i) {
// Lowering all @llvm.objectsize calls first because they may
// use a bitcast/GEP of the alloca we are removing.
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 38c219ce3465..9f26b37bbc79 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -232,6 +232,12 @@ static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
cl::desc("Track origins of labels"),
cl::Hidden, cl::init(0));
+static cl::opt<bool> ClIgnorePersonalityRoutine(
+ "dfsan-ignore-personality-routine",
+ cl::desc("If a personality routine is marked uninstrumented from the ABI "
+ "list, do not create a wrapper for it."),
+ cl::Hidden, cl::init(false));
+
static StringRef getGlobalTypeString(const GlobalValue &G) {
// Types of GlobalVariables are always pointer types.
Type *GType = G.getValueType();
@@ -1115,7 +1121,7 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
if (F->isVarArg()) {
- NewF->removeFnAttrs(AttrBuilder().addAttribute("split-stack"));
+ NewF->removeFnAttr("split-stack");
CallInst::Create(DFSanVarargWrapperFn,
IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
BB);
@@ -1357,9 +1363,24 @@ bool DataFlowSanitizer::runImpl(Module &M) {
std::vector<Function *> FnsToInstrument;
SmallPtrSet<Function *, 2> FnsWithNativeABI;
SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
+ SmallPtrSet<Constant *, 1> PersonalityFns;
for (Function &F : M)
- if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F))
+ if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F)) {
FnsToInstrument.push_back(&F);
+ if (F.hasPersonalityFn())
+ PersonalityFns.insert(F.getPersonalityFn()->stripPointerCasts());
+ }
+
+ if (ClIgnorePersonalityRoutine) {
+ for (auto *C : PersonalityFns) {
+ assert(isa<Function>(C) && "Personality routine is not a function!");
+ Function *F = cast<Function>(C);
+ if (!isInstrumented(F))
+ FnsToInstrument.erase(
+ std::remove(FnsToInstrument.begin(), FnsToInstrument.end(), F),
+ FnsToInstrument.end());
+ }
+ }
// Give function aliases prefixes when necessary, and build wrappers where the
// instrumentedness is inconsistent.
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index d1d3b8ffdf7a..de34348606ef 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -26,7 +26,9 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -40,6 +42,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/InstrProfCorrelator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
@@ -57,6 +60,13 @@ using namespace llvm;
#define DEBUG_TYPE "instrprof"
+namespace llvm {
+cl::opt<bool>
+ DebugInfoCorrelate("debug-info-correlate", cl::ZeroOrMore,
+ cl::desc("Use debug info to correlate profiles."),
+ cl::init(false));
+} // namespace llvm
+
namespace {
cl::opt<bool> DoHashBasedCounterSplit(
@@ -641,6 +651,12 @@ void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
}
void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
+ // TODO: Value profiling heavily depends on the data section which is omitted
+ // in lightweight mode. We need to move the value profile pointer to the
+ // Counter struct to get this working.
+ assert(
+ !DebugInfoCorrelate &&
+ "Value profiling is not yet supported with lightweight instrumentation");
GlobalVariable *Name = Ind->getName();
auto It = ProfileDataMap.find(Name);
assert(It != ProfileDataMap.end() && It->second.DataVar &&
@@ -855,6 +871,12 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
+ // Use internal rather than private linkage so the counter variable shows up
+ // in the symbol table when using debug info for correlation.
+ if (DebugInfoCorrelate && TT.isOSBinFormatMachO() &&
+ Linkage == GlobalValue::PrivateLinkage)
+ Linkage = GlobalValue::InternalLinkage;
+
// Due to the limitation of binder as of 2021/09/28, the duplicate weak
// symbols in the same csect won't be discarded. When there are duplicate weak
// symbols, we can NOT guarantee that the relocations get resolved to the
@@ -916,6 +938,42 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
MaybeSetComdat(CounterPtr);
CounterPtr->setLinkage(Linkage);
PD.RegionCounters = CounterPtr;
+ if (DebugInfoCorrelate) {
+ if (auto *SP = Fn->getSubprogram()) {
+ DIBuilder DB(*M, true, SP->getUnit());
+ Metadata *FunctionNameAnnotation[] = {
+ MDString::get(Ctx, InstrProfCorrelator::FunctionNameAttributeName),
+ MDString::get(Ctx, getPGOFuncNameVarInitializer(NamePtr)),
+ };
+ Metadata *CFGHashAnnotation[] = {
+ MDString::get(Ctx, InstrProfCorrelator::CFGHashAttributeName),
+ ConstantAsMetadata::get(Inc->getHash()),
+ };
+ Metadata *NumCountersAnnotation[] = {
+ MDString::get(Ctx, InstrProfCorrelator::NumCountersAttributeName),
+ ConstantAsMetadata::get(Inc->getNumCounters()),
+ };
+ auto Annotations = DB.getOrCreateArray({
+ MDNode::get(Ctx, FunctionNameAnnotation),
+ MDNode::get(Ctx, CFGHashAnnotation),
+ MDNode::get(Ctx, NumCountersAnnotation),
+ });
+ auto *DICounter = DB.createGlobalVariableExpression(
+ SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(),
+ /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"),
+ CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr,
+ /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
+ Annotations);
+ CounterPtr->addDebugInfo(DICounter);
+ DB.finalize();
+ } else {
+ std::string Msg = ("Missing debug info for function " + Fn->getName() +
+ "; required for profile correlation.")
+ .str();
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+ }
+ }
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
// Allocate statically the array of pointers to value profile nodes for
@@ -939,6 +997,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
}
+ if (DebugInfoCorrelate)
+ return PD.RegionCounters;
+
// Create data variable.
auto *IntPtrTy = M->getDataLayout().getIntPtrType(M->getContext());
auto *Int16Ty = Type::getInt16Ty(Ctx);
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4d15b784f486..446e601cd4d7 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -307,6 +307,11 @@ static cl::opt<bool>
cl::desc("Enable KernelMemorySanitizer instrumentation"),
cl::Hidden, cl::init(false));
+static cl::opt<bool>
+ ClDisableChecks("msan-disable-checks",
+ cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
+ cl::init(false));
+
// This is an experiment to enable handling of cases where shadow is a non-zero
// compile-time constant. For some unexplainable reason they were silently
// ignored in the instrumentation.
@@ -1095,7 +1100,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
const TargetLibraryInfo &TLI)
: F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
- bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
+ bool SanitizeFunction =
+ F.hasFnAttribute(Attribute::SanitizeMemory) && !ClDisableChecks;
InsertChecks = SanitizeFunction;
PropagateShadow = SanitizeFunction;
PoisonStack = SanitizeFunction && ClPoisonStack;
@@ -1214,7 +1220,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
Value *ShadowPtr, *OriginPtr;
Type *ShadowTy = Shadow->getType();
- const Align Alignment = assumeAligned(SI->getAlignment());
+ const Align Alignment = SI->getAlign();
const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
@@ -3887,8 +3893,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
&I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
- IRB.CreateMemSet(ShadowBase, PoisonValue, Len,
- MaybeAlign(I.getAlignment()));
+ IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlign());
}
if (PoisonStack && MS.TrackOrigins) {
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index af5946325bbb..b6ba1fc2132c 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -273,14 +273,14 @@ static cl::opt<bool> PGOVerifyBFI(
"internal option -pass-remakrs-analysis=pgo."));
static cl::opt<unsigned> PGOVerifyBFIRatio(
- "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden,
- cl::desc("Set the threshold for pgo-verify-big -- only print out "
+ "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
+ cl::desc("Set the threshold for pgo-verify-bfi: only print out "
"mismatched BFI if the difference percentage is greater than "
"this value (in percentage)."));
static cl::opt<unsigned> PGOVerifyBFICutoff(
- "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden,
- cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose "
+ "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
+ cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
"profile count value is below."));
namespace llvm {
@@ -291,6 +291,8 @@ extern cl::opt<PGOViewCountsType> PGOViewCounts;
// Command line option to specify the name of the function for CFG dump
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+
+extern cl::opt<bool> DebugInfoCorrelate;
} // namespace llvm
static cl::opt<bool>
@@ -467,8 +469,9 @@ private:
createProfileFileNameVar(M, InstrProfileOutput);
// The variable in a comdat may be discarded by LTO. Ensure the
// declaration will be retained.
- appendToCompilerUsed(
- M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, PGOInstrumentEntry));
+ appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true,
+ PGOInstrumentEntry,
+ DebugInfoCorrelate));
return false;
}
std::string InstrProfileOutput;
@@ -1616,7 +1619,8 @@ static bool InstrumentAllFunctions(
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS)
- createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry);
+ createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry,
+ DebugInfoCorrelate);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
@@ -1638,8 +1642,9 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
createProfileFileNameVar(M, CSInstrName);
// The variable in a comdat may be discarded by LTO. Ensure the declaration
// will be retained.
- appendToCompilerUsed(
- M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, PGOInstrumentEntry));
+ appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true,
+ PGOInstrumentEntry,
+ DebugInfoCorrelate));
return PreservedAnalyses::all();
}
@@ -1774,7 +1779,7 @@ static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
uint64_t Diff = (BFICountValue >= CountValue)
? BFICountValue - CountValue
: CountValue - BFICountValue;
- if (Diff < CountValue / 100 * PGOVerifyBFIRatio)
+ if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
continue;
}
BBMisMatchNum++;
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 27f54f8026e1..37a7053d778e 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -271,8 +271,7 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
// subtree of BB (subtree not including the BB itself).
DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap;
InsertPtsMap.reserve(Orders.size() + 1);
- for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) {
- BasicBlock *Node = *RIt;
+ for (BasicBlock *Node : llvm::reverse(Orders)) {
bool NodeInBBs = BBs.count(Node);
auto &InsertPts = InsertPtsMap[Node].first;
BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second;
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 8c4523206070..dda1a2f08076 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -588,7 +588,7 @@ struct AllSwitchPaths {
PrevBB = BB;
}
- if (TPath.isExitValueSet())
+ if (TPath.isExitValueSet() && isSupported(TPath))
TPaths.push_back(TPath);
}
}
@@ -683,6 +683,62 @@ private:
return Res;
}
+ /// The determinator BB should precede the switch-defining BB.
+ ///
+ /// Otherwise, it is possible that the state defined in the determinator block
+ /// defines the state for the next iteration of the loop, rather than for the
+ /// current one.
+ ///
+ /// Currently supported paths:
+ /// \code
+ /// < switch bb1 determ def > [ 42, determ ]
+ /// < switch_and_def bb1 determ > [ 42, determ ]
+ /// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ]
+ /// \endcode
+ ///
+ /// Unsupported paths:
+ /// \code
+ /// < switch bb1 def determ > [ 43, determ ]
+ /// < switch_and_determ bb1 def > [ 43, switch_and_determ ]
+ /// \endcode
+ bool isSupported(const ThreadingPath &TPath) {
+ Instruction *SwitchCondI = dyn_cast<Instruction>(Switch->getCondition());
+ assert(SwitchCondI);
+ if (!SwitchCondI)
+ return false;
+
+ const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent();
+ const BasicBlock *SwitchCondUseBB = Switch->getParent();
+ const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB();
+
+ assert(
+ SwitchCondUseBB == TPath.getPath().front() &&
+ "The first BB in a threading path should have the switch instruction");
+ if (SwitchCondUseBB != TPath.getPath().front())
+ return false;
+
+ // Make DeterminatorBB the first element in Path.
+ PathType Path = TPath.getPath();
+ auto ItDet = std::find(Path.begin(), Path.end(), DeterminatorBB);
+ std::rotate(Path.begin(), ItDet, Path.end());
+
+ bool IsDetBBSeen = false;
+ bool IsDefBBSeen = false;
+ bool IsUseBBSeen = false;
+ for (BasicBlock *BB : Path) {
+ if (BB == DeterminatorBB)
+ IsDetBBSeen = true;
+ if (BB == SwitchCondDefBB)
+ IsDefBBSeen = true;
+ if (BB == SwitchCondUseBB)
+ IsUseBBSeen = true;
+ if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen)
+ return false;
+ }
+
+ return true;
+ }
+
SwitchInst *Switch;
BasicBlock *SwitchBlock;
OptimizationRemarkEmitter *ORE;
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e0d3a6accadd..eadbb4293539 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -175,44 +175,6 @@ static cl::opt<bool>
using OverlapIntervalsTy = std::map<int64_t, int64_t>;
using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
-/// If the value of this instruction and the memory it writes to is unused, may
-/// we delete this instruction?
-static bool isRemovable(Instruction *I) {
- // Don't remove volatile/atomic stores.
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return SI->isUnordered();
-
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- default: llvm_unreachable("Does not have LocForWrite");
- case Intrinsic::lifetime_end:
- // Never remove dead lifetime_end's, e.g. because it is followed by a
- // free.
- return false;
- case Intrinsic::init_trampoline:
- // Always safe to remove init_trampoline.
- return true;
- case Intrinsic::memset:
- case Intrinsic::memmove:
- case Intrinsic::memcpy:
- case Intrinsic::memcpy_inline:
- // Don't remove volatile memory intrinsics.
- return !cast<MemIntrinsic>(II)->isVolatile();
- case Intrinsic::memcpy_element_unordered_atomic:
- case Intrinsic::memmove_element_unordered_atomic:
- case Intrinsic::memset_element_unordered_atomic:
- case Intrinsic::masked_store:
- return true;
- }
- }
-
- // note: only get here for calls with analyzable writes - i.e. libcalls
- if (auto *CB = dyn_cast<CallBase>(I))
- return CB->use_empty();
-
- return false;
-}
-
/// Returns true if the end of this instruction can be safely shortened in
/// length.
static bool isShortenableAtTheEnd(Instruction *I) {
@@ -835,7 +797,7 @@ struct DSEState {
auto *MD = dyn_cast_or_null<MemoryDef>(MA);
if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
- (getLocForWriteEx(&I) || isMemTerminatorInst(&I)))
+ (getLocForWrite(&I) || isMemTerminatorInst(&I)))
MemDefs.push_back(MD);
}
}
@@ -1022,48 +984,39 @@ struct DSEState {
return I.first->second;
}
- Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const {
+ Optional<MemoryLocation> getLocForWrite(Instruction *I) const {
if (!I->mayWriteToMemory())
return None;
- if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I))
- return {MemoryLocation::getForDest(MTI)};
+ if (auto *CB = dyn_cast<CallBase>(I))
+ return MemoryLocation::getForDest(CB, TLI);
+
+ return MemoryLocation::getOrNone(I);
+ }
+
+ /// Assuming this instruction has a dead analyzable write, can we delete
+ /// this instruction?
+ bool isRemovable(Instruction *I) {
+ assert(getLocForWrite(I) && "Must have analyzable write");
+
+ // Don't remove volatile/atomic stores.
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isUnordered();
if (auto *CB = dyn_cast<CallBase>(I)) {
- // If the functions may write to memory we do not know about, bail out.
- if (!CB->onlyAccessesArgMemory() &&
- !CB->onlyAccessesInaccessibleMemOrArgMem())
- return None;
+ // Don't remove volatile memory intrinsics.
+ if (auto *MI = dyn_cast<MemIntrinsic>(CB))
+ return !MI->isVolatile();
- LibFunc LF;
- if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
- switch (LF) {
- case LibFunc_strncpy:
- if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
- return MemoryLocation(CB->getArgOperand(0),
- LocationSize::precise(Len->getZExtValue()),
- CB->getAAMetadata());
- LLVM_FALLTHROUGH;
- case LibFunc_strcpy:
- case LibFunc_strcat:
- case LibFunc_strncat:
- return {MemoryLocation::getAfter(CB->getArgOperand(0))};
- default:
- break;
- }
- }
- switch (CB->getIntrinsicID()) {
- case Intrinsic::init_trampoline:
- return {MemoryLocation::getAfter(CB->getArgOperand(0))};
- case Intrinsic::masked_store:
- return {MemoryLocation::getForArgument(CB, 1, TLI)};
- default:
- break;
- }
- return None;
+ // Never remove dead lifetime intrinsics, e.g. because they are followed
+ // by a free.
+ if (CB->isLifetimeStartOrEnd())
+ return false;
+
+ return CB->use_empty() && CB->willReturn() && CB->doesNotThrow();
}
- return MemoryLocation::getOrNone(I);
+ return false;
}
/// Returns true if \p UseInst completely overwrites \p DefLoc
@@ -1081,7 +1034,7 @@ struct DSEState {
return false;
int64_t InstWriteOffset, DepWriteOffset;
- if (auto CC = getLocForWriteEx(UseInst))
+ if (auto CC = getLocForWrite(UseInst))
return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
DepWriteOffset) == OW_Complete;
return false;
@@ -1093,7 +1046,7 @@ struct DSEState {
<< *Def->getMemoryInst()
<< ") is at the end the function \n");
- auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst());
+ auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
if (!MaybeLoc) {
LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
return false;
@@ -1237,30 +1190,14 @@ struct DSEState {
/// loop. In particular, this guarantees that it only references a single
/// MemoryLocation during execution of the containing function.
bool isGuaranteedLoopInvariant(const Value *Ptr) {
- auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) {
- Ptr = Ptr->stripPointerCasts();
- if (auto *I = dyn_cast<Instruction>(Ptr)) {
- if (isa<AllocaInst>(Ptr))
- return true;
-
- if (isAllocLikeFn(I, &TLI))
- return true;
-
- return false;
- }
- return true;
- };
-
Ptr = Ptr->stripPointerCasts();
- if (auto *I = dyn_cast<Instruction>(Ptr)) {
- if (I->getParent()->isEntryBlock())
- return true;
- }
- if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
- return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) &&
- GEP->hasAllConstantIndices();
- }
- return IsGuaranteedLoopInvariantBase(Ptr);
+ if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
+ if (GEP->hasAllConstantIndices())
+ Ptr = GEP->getPointerOperand()->stripPointerCasts();
+
+ if (auto *I = dyn_cast<Instruction>(Ptr))
+ return I->getParent()->isEntryBlock();
+ return true;
}
// Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
@@ -1372,7 +1309,7 @@ struct DSEState {
// If Current does not have an analyzable write location or is not
// removable, skip it.
- CurrentLoc = getLocForWriteEx(CurrentI);
+ CurrentLoc = getLocForWrite(CurrentI);
if (!CurrentLoc || !isRemovable(CurrentI)) {
CanOptimize = false;
continue;
@@ -1729,14 +1666,13 @@ struct DSEState {
LLVM_DEBUG(
dbgs()
<< "Trying to eliminate MemoryDefs at the end of the function\n");
- for (int I = MemDefs.size() - 1; I >= 0; I--) {
- MemoryDef *Def = MemDefs[I];
- if (SkipStores.contains(Def) || !isRemovable(Def->getMemoryInst()))
+ for (MemoryDef *Def : llvm::reverse(MemDefs)) {
+ if (SkipStores.contains(Def))
continue;
Instruction *DefI = Def->getMemoryInst();
- auto DefLoc = getLocForWriteEx(DefI);
- if (!DefLoc)
+ auto DefLoc = getLocForWrite(DefI);
+ if (!DefLoc || !isRemovable(DefI))
continue;
// NOTE: Currently eliminating writes at the end of a function is limited
@@ -1763,13 +1699,19 @@ struct DSEState {
/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
- StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst());
- MemSetInst *MemSet = dyn_cast<MemSetInst>(Def->getMemoryInst());
+ Instruction *DefI = Def->getMemoryInst();
+ StoreInst *Store = dyn_cast<StoreInst>(DefI);
+ MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
Constant *StoredConstant = nullptr;
if (Store)
StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
- if (MemSet)
+ else if (MemSet)
StoredConstant = dyn_cast<Constant>(MemSet->getValue());
+ else
+ return false;
+
+ if (!isRemovable(DefI))
+ return false;
if (StoredConstant && StoredConstant->isNullValue()) {
auto *DefUOInst = dyn_cast<Instruction>(DefUO);
@@ -1902,7 +1844,7 @@ struct DSEState {
bool Changed = false;
for (auto OI : IOL) {
Instruction *DeadI = OI.first;
- MemoryLocation Loc = *getLocForWriteEx(DeadI);
+ MemoryLocation Loc = *getLocForWrite(DeadI);
assert(isRemovable(DeadI) && "Expect only removable instruction");
const Value *Ptr = Loc.Ptr->stripPointerCasts();
@@ -1925,9 +1867,14 @@ struct DSEState {
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
"already existing value\n");
for (auto *Def : MemDefs) {
- if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
- !isRemovable(Def->getMemoryInst()))
+ if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
continue;
+
+ Instruction *DefInst = Def->getMemoryInst();
+ auto MaybeDefLoc = getLocForWrite(DefInst);
+ if (!MaybeDefLoc || !isRemovable(DefInst))
+ continue;
+
MemoryDef *UpperDef;
// To conserve compile-time, we avoid walking to the next clobbering def.
// Instead, we just try to get the optimized access, if it exists. DSE
@@ -1939,17 +1886,14 @@ struct DSEState {
if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
continue;
- Instruction *DefInst = Def->getMemoryInst();
Instruction *UpperInst = UpperDef->getMemoryInst();
- auto IsRedundantStore = [this, DefInst,
- UpperInst](MemoryLocation UpperLoc) {
+ auto IsRedundantStore = [&]() {
if (DefInst->isIdenticalTo(UpperInst))
return true;
if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
- auto MaybeDefLoc = getLocForWriteEx(DefInst);
- if (!MaybeDefLoc)
- return false;
+ // MemSetInst must have a write location.
+ MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
int64_t InstWriteOffset = 0;
int64_t DepWriteOffset = 0;
auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
@@ -1962,9 +1906,7 @@ struct DSEState {
return false;
};
- auto MaybeUpperLoc = getLocForWriteEx(UpperInst);
- if (!MaybeUpperLoc || !IsRedundantStore(*MaybeUpperLoc) ||
- isReadClobber(*MaybeUpperLoc, DefInst))
+ if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
continue;
LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
<< '\n');
@@ -1995,7 +1937,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
[](const std::pair<MemoryLocation, bool> &P) { return P.first; });
else
- MaybeKillingLoc = State.getLocForWriteEx(KillingI);
+ MaybeKillingLoc = State.getLocForWrite(KillingI);
if (!MaybeKillingLoc) {
LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
@@ -2059,7 +2001,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
if (!DebugCounter::shouldExecute(MemorySSACounter))
continue;
- MemoryLocation DeadLoc = *State.getLocForWriteEx(DeadI);
+ MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
if (IsMemTerm) {
const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
@@ -2124,8 +2066,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
}
// Check if the store is a no-op.
- if (!Shortend && isRemovable(KillingI) &&
- State.storeIsNoop(KillingDef, KillingUndObj)) {
+ if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
<< '\n');
State.deleteDeadInstruction(KillingI);
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 90f71f7729a7..a24997dd3fd4 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -1366,8 +1366,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
continue;
}
- if (auto *I = dyn_cast<Instruction>(V))
- I->andIRFlags(&Inst);
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // If I being poison triggers UB, there is no need to drop those
+ // flags. Otherwise, only retain flags present on both I and Inst.
+ // TODO: Currently some fast-math flags are not treated as
+ // poison-generating even though they should. Until this is fixed,
+ // always retain flags present on both I and Inst for floating point
+ // instructions.
+ if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
+ I->andIRFlags(&Inst);
+ }
Inst.replaceAllUsesWith(V);
salvageKnowledge(&Inst, &AC);
removeMSSA(Inst);
diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index e54a270fb276..44017b555769 100644
--- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -13,10 +13,12 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -24,11 +26,11 @@ using namespace llvm;
#define DEBUG_TYPE "flattencfg"
namespace {
-struct FlattenCFGPass : public FunctionPass {
+struct FlattenCFGLegacyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
public:
- FlattenCFGPass() : FunctionPass(ID) {
- initializeFlattenCFGPassPass(*PassRegistry::getPassRegistry());
+ FlattenCFGLegacyPass() : FunctionPass(ID) {
+ initializeFlattenCFGLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -39,21 +41,10 @@ public:
private:
AliasAnalysis *AA;
};
-}
-
-char FlattenCFGPass::ID = 0;
-INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
- false)
-
-// Public interface to the FlattenCFG pass
-FunctionPass *llvm::createFlattenCFGPass() { return new FlattenCFGPass(); }
/// iterativelyFlattenCFG - Call FlattenCFG on all the blocks in the function,
/// iterating until no more changes are made.
-static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
+bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
bool Changed = false;
bool LocalChange = true;
@@ -78,8 +69,22 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
}
return Changed;
}
+} // namespace
-bool FlattenCFGPass::runOnFunction(Function &F) {
+char FlattenCFGLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(FlattenCFGLegacyPass, "flattencfg", "Flatten the CFG",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(FlattenCFGLegacyPass, "flattencfg", "Flatten the CFG",
+ false, false)
+
+// Public interface to the FlattenCFG pass
+FunctionPass *llvm::createFlattenCFGPass() {
+ return new FlattenCFGLegacyPass();
+}
+
+bool FlattenCFGLegacyPass::runOnFunction(Function &F) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool EverChanged = false;
// iterativelyFlattenCFG can make some blocks dead.
@@ -89,3 +94,15 @@ bool FlattenCFGPass::runOnFunction(Function &F) {
}
return EverChanged;
}
+
+PreservedAnalyses FlattenCFGPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool EverChanged = false;
+ AliasAnalysis *AA = &AM.getResult<AAManager>(F);
+ // iterativelyFlattenCFG can make some blocks dead.
+ while (iterativelyFlattenCFG(F, AA)) {
+ removeUnreachableBlocks(F);
+ EverChanged = true;
+ }
+ return EverChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 6f97f3e93123..bc792ca3d8da 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -107,11 +107,6 @@ static cl::opt<bool> ControlFlowHoisting(
"licm-control-flow-hoisting", cl::Hidden, cl::init(false),
cl::desc("Enable control flow (and PHI) hoisting in LICM"));
-static cl::opt<unsigned> HoistSinkColdnessThreshold(
- "licm-coldness-threshold", cl::Hidden, cl::init(4),
- cl::desc("Relative coldness Threshold of hoisting/sinking destination "
- "block for LICM to be considered beneficial"));
-
static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "
@@ -819,35 +814,6 @@ public:
};
} // namespace
-// Hoisting/sinking instruction out of a loop isn't always beneficial. It's only
-// only worthwhile if the destination block is actually colder than current
-// block.
-static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
- OptimizationRemarkEmitter *ORE,
- BlockFrequencyInfo *BFI) {
- // Check block frequency only when runtime profile is available
- // to avoid pathological cases. With static profile, lean towards
- // hosting because it helps canonicalize the loop for vectorizer.
- if (!DstBlock->getParent()->hasProfileData())
- return true;
-
- if (!HoistSinkColdnessThreshold || !BFI)
- return true;
-
- BasicBlock *SrcBlock = I.getParent();
- if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold >
- BFI->getBlockFreq(SrcBlock).getFrequency()) {
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I)
- << "failed to sink or hoist instruction because containing block "
- "has lower frequency than destination block";
- });
- return false;
- }
-
- return true;
-}
-
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
@@ -909,7 +875,6 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU,
true, &Flags, ORE) &&
- worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
isSafeToExecuteUnconditionally(
I, DT, TLI, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) {
@@ -1741,7 +1706,6 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// First check if I is worth sinking for all uses. Sink only when it is worth
// across all uses.
SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
- SmallVector<PHINode *, 8> ExitPNs;
for (auto *UI : Users) {
auto *User = cast<Instruction>(UI);
@@ -1751,14 +1715,6 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
PHINode *PN = cast<PHINode>(User);
assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!");
- if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) {
- return Changed;
- }
-
- ExitPNs.push_back(PN);
- }
-
- for (auto *PN : ExitPNs) {
// The PHI must be trivially replaceable.
Instruction *New = sinkThroughTriviallyReplaceablePHI(
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 77d76609c926..57e36e5b9b90 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -224,8 +224,8 @@ bool LoopDataPrefetch::run() {
bool MadeChange = false;
for (Loop *I : *LI)
- for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
- MadeChange |= runOnLoop(*L);
+ for (Loop *L : depth_first(I))
+ MadeChange |= runOnLoop(L);
return MadeChange;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 42da86a9ecf5..5d00fa56e888 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -786,9 +786,9 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
- MaybeAlign(HeadStore->getAlignment()),
- StoredVal, HeadStore, AdjacentStores, StoreEv,
- BECount, IsNegStride)) {
+ MaybeAlign(HeadStore->getAlign()), StoredVal,
+ HeadStore, AdjacentStores, StoreEv, BECount,
+ IsNegStride)) {
TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
Changed = true;
}
@@ -967,12 +967,22 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
<< "\n");
if (PositiveStrideSCEV != MemsetSizeSCEV) {
- // TODO: folding can be done to the SCEVs
- // The folding is to fold expressions that is covered by the loop guard
- // at loop entry. After the folding, compare again and proceed
- // optimization if equal.
- LLVM_DEBUG(dbgs() << " SCEV don't match, abort\n");
- return false;
+ // If an expression is covered by the loop guard, compare again and
+ // proceed with optimization if equal.
+ const SCEV *FoldedPositiveStride =
+ SE->applyLoopGuards(PositiveStrideSCEV, CurLoop);
+ const SCEV *FoldedMemsetSize =
+ SE->applyLoopGuards(MemsetSizeSCEV, CurLoop);
+
+ LLVM_DEBUG(dbgs() << " Try to fold SCEV based on loop guard\n"
+ << " FoldedMemsetSize: " << *FoldedMemsetSize << "\n"
+ << " FoldedPositiveStride: " << *FoldedPositiveStride
+ << "\n");
+
+ if (FoldedPositiveStride != FoldedMemsetSize) {
+ LLVM_DEBUG(dbgs() << " SCEV don't match, abort\n");
+ return false;
+ }
}
}
diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 56d66b93dd69..9d22eceb987f 100644
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1456,16 +1456,12 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *BackedgeTakenCount) {
}
// Remove instructions associated with non-base iterations.
- for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend();
- J != JE;) {
- unsigned I = Uses[&*J].find_first();
+ for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(*Header))) {
+ unsigned I = Uses[&Inst].find_first();
if (I > 0 && I < IL_All) {
- LLVM_DEBUG(dbgs() << "LRR: removing: " << *J << "\n");
- J++->eraseFromParent();
- continue;
+ LLVM_DEBUG(dbgs() << "LRR: removing: " << Inst << "\n");
+ Inst.eraseFromParent();
}
-
- ++J;
}
// Rewrite each BaseInst using SCEV.
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a9a2266e1196..798af48c2337 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6011,7 +6011,7 @@ struct SCEVDbgValueBuilder {
// See setFinalExpression: prepend our opcodes on the start of any old
// expression opcodes.
assert(!DI.hasArgList());
- llvm::SmallVector<uint64_t, 6> FinalExpr(Expr.begin() + 2, Expr.end());
+ llvm::SmallVector<uint64_t, 6> FinalExpr(llvm::drop_begin(Expr, 2));
auto *NewExpr =
DIExpression::prependOpcodes(OldExpr, FinalExpr, /*StackValue*/ true);
DI.setExpression(NewExpr);
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 39c8b65968aa..893928fb0560 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1136,6 +1136,31 @@ static LoopUnrollResult tryToUnrollLoop(
TransformationMode TM = hasUnrollTransformation(L);
if (TM & TM_Disable)
return LoopUnrollResult::Unmodified;
+
+ // If this loop isn't forced to be unrolled, avoid unrolling it when the
+ // parent loop has an explicit unroll-and-jam pragma. This is to prevent
+ // automatic unrolling from interfering with the user requested
+ // transformation.
+ Loop *ParentL = L->getParentLoop();
+ if (ParentL != NULL &&
+ hasUnrollAndJamTransformation(ParentL) == TM_ForcedByUser &&
+ hasUnrollTransformation(L) != TM_ForcedByUser) {
+ LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"
+ << " llvm.loop.unroll_and_jam.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ // If this loop isn't forced to be unrolled, avoid unrolling it when the
+ // loop has an explicit unroll-and-jam pragma. This is to prevent automatic
+ // unrolling from interfering with the user requested transformation.
+ if (hasUnrollAndJamTransformation(L) == TM_ForcedByUser &&
+ hasUnrollTransformation(L) != TM_ForcedByUser) {
+ LLVM_DEBUG(
+ dbgs()
+ << " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
if (!L->isLoopSimplifyForm()) {
LLVM_DEBUG(
dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 91215cd19e2b..10a8742940b1 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -638,6 +638,7 @@ class NewGVN {
BitVector TouchedInstructions;
DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
+ mutable DenseMap<const IntrinsicInst *, const Value *> IntrinsicInstPred;
#ifndef NDEBUG
// Debugging for how many times each block and instruction got processed.
@@ -794,7 +795,7 @@ private:
BasicBlock *PHIBlock) const;
const Expression *performSymbolicAggrValueEvaluation(Instruction *) const;
ExprResult performSymbolicCmpEvaluation(Instruction *) const;
- ExprResult performSymbolicPredicateInfoEvaluation(Instruction *) const;
+ ExprResult performSymbolicPredicateInfoEvaluation(IntrinsicInst *) const;
// Congruence finding.
bool someEquivalentDominates(const Instruction *, const Instruction *) const;
@@ -815,6 +816,8 @@ private:
// Ranking
unsigned int getRank(const Value *) const;
bool shouldSwapOperands(const Value *, const Value *) const;
+ bool shouldSwapOperandsForIntrinsic(const Value *, const Value *,
+ const IntrinsicInst *I) const;
// Reachability handling.
void updateReachableEdge(BasicBlock *, BasicBlock *);
@@ -1552,7 +1555,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
}
NewGVN::ExprResult
-NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
+NewGVN::performSymbolicPredicateInfoEvaluation(IntrinsicInst *I) const {
auto *PI = PredInfo->getPredicateInfoFor(I);
if (!PI)
return ExprResult::none();
@@ -1572,7 +1575,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
Value *AdditionallyUsedValue = CmpOp0;
// Sort the ops.
- if (shouldSwapOperands(FirstOp, SecondOp)) {
+ if (shouldSwapOperandsForIntrinsic(FirstOp, SecondOp, I)) {
std::swap(FirstOp, SecondOp);
Predicate = CmpInst::getSwappedPredicate(Predicate);
AdditionallyUsedValue = CmpOp1;
@@ -1598,7 +1601,7 @@ NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const {
// Intrinsics with the returned attribute are copies of arguments.
if (auto *ReturnedValue = II->getReturnedArgOperand()) {
if (II->getIntrinsicID() == Intrinsic::ssa_copy)
- if (auto Res = performSymbolicPredicateInfoEvaluation(I))
+ if (auto Res = performSymbolicPredicateInfoEvaluation(II))
return Res;
return ExprResult::some(createVariableOrConstant(ReturnedValue));
}
@@ -2951,6 +2954,7 @@ void NewGVN::cleanupTables() {
PredicateToUsers.clear();
MemoryToUsers.clear();
RevisitOnReachabilityChange.clear();
+ IntrinsicInstPred.clear();
}
// Assign local DFS number mapping to instructions, and leave space for Value
@@ -4152,6 +4156,29 @@ bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const {
return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B);
}
+bool NewGVN::shouldSwapOperandsForIntrinsic(const Value *A, const Value *B,
+ const IntrinsicInst *I) const {
+ auto LookupResult = IntrinsicInstPred.find(I);
+ if (shouldSwapOperands(A, B)) {
+ if (LookupResult == IntrinsicInstPred.end())
+ IntrinsicInstPred.insert({I, B});
+ else
+ LookupResult->second = B;
+ return true;
+ }
+
+ if (LookupResult != IntrinsicInstPred.end()) {
+ auto *SeenPredicate = LookupResult->second;
+ if (SeenPredicate) {
+ if (SeenPredicate == B)
+ return true;
+ else
+ LookupResult->second = nullptr;
+ }
+ }
+ return false;
+}
+
namespace {
class NewGVNLegacyPass : public FunctionPass {
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 2d3490b2d29e..e12eca0ed287 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1359,16 +1359,6 @@ static constexpr Attribute::AttrKind FnAttrsToStrip[] =
Attribute::InaccessibleMemOrArgMemOnly,
Attribute::NoSync, Attribute::NoFree};
-// List of all parameter and return attributes which must be stripped when
-// lowering from the abstract machine model. Note that we list attributes
-// here which aren't valid as return attributes, that is okay. There are
-// also some additional attributes with arguments which are handled
-// explicitly and are not in this list.
-static constexpr Attribute::AttrKind ParamAttrsToStrip[] =
- {Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly,
- Attribute::NoAlias, Attribute::NoFree};
-
-
// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
@@ -2650,24 +2640,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
return !Records.empty();
}
-// Handles both return values and arguments for Functions and calls.
-template <typename AttrHolder>
-static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
- unsigned Index) {
+// List of all parameter and return attributes which must be stripped when
+// lowering from the abstract machine model. Note that we list attributes
+// here which aren't valid as return attributes, that is okay.
+static AttrBuilder getParamAndReturnAttributesToRemove() {
AttrBuilder R;
- AttributeSet AS = AH.getAttributes().getAttributes(Index);
- if (AS.getDereferenceableBytes())
- R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
- AS.getDereferenceableBytes()));
- if (AS.getDereferenceableOrNullBytes())
- R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
- AS.getDereferenceableOrNullBytes()));
- for (auto Attr : ParamAttrsToStrip)
- if (AS.hasAttribute(Attr))
- R.addAttribute(Attr);
-
- if (!R.empty())
- AH.setAttributes(AH.getAttributes().removeAttributesAtIndex(Ctx, Index, R));
+ R.addDereferenceableAttr(1);
+ R.addDereferenceableOrNullAttr(1);
+ R.addAttribute(Attribute::ReadNone);
+ R.addAttribute(Attribute::ReadOnly);
+ R.addAttribute(Attribute::WriteOnly);
+ R.addAttribute(Attribute::NoAlias);
+ R.addAttribute(Attribute::NoFree);
+ return R;
}
static void stripNonValidAttributesFromPrototype(Function &F) {
@@ -2683,13 +2668,13 @@ static void stripNonValidAttributesFromPrototype(Function &F) {
return;
}
+ AttrBuilder R = getParamAndReturnAttributesToRemove();
for (Argument &A : F.args())
if (isa<PointerType>(A.getType()))
- RemoveNonValidAttrAtIndex(Ctx, F,
- A.getArgNo() + AttributeList::FirstArgIndex);
+ F.removeParamAttrs(A.getArgNo(), R);
if (isa<PointerType>(F.getReturnType()))
- RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex);
+ F.removeRetAttrs(R);
for (auto Attr : FnAttrsToStrip)
F.removeFnAttr(Attr);
@@ -2757,13 +2742,13 @@ static void stripNonValidDataFromBody(Function &F) {
stripInvalidMetadataFromInstruction(I);
+ AttrBuilder R = getParamAndReturnAttributesToRemove();
if (auto *Call = dyn_cast<CallBase>(&I)) {
for (int i = 0, e = Call->arg_size(); i != e; i++)
if (isa<PointerType>(Call->getArgOperand(i)->getType()))
- RemoveNonValidAttrAtIndex(Ctx, *Call,
- i + AttributeList::FirstArgIndex);
+ Call->removeParamAttrs(i, R);
if (isa<PointerType>(Call->getType()))
- RemoveNonValidAttrAtIndex(Ctx, *Call, AttributeList::ReturnIndex);
+ Call->removeRetAttrs(R);
}
}
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index 28e00c873361..ff2f8a25f379 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -101,8 +101,7 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
Constant *Const = nullptr;
if (V->getType()->isStructTy()) {
std::vector<ValueLatticeElement> IVs = Solver.getStructLatticeValueFor(V);
- if (any_of(IVs,
- [](const ValueLatticeElement &LV) { return isOverdefined(LV); }))
+ if (llvm::any_of(IVs, isOverdefined))
return false;
std::vector<Constant *> ConstVals;
auto *ST = cast<StructType>(V->getType());
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index a041af0d70d0..f9650efc051f 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -54,7 +54,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeMakeGuardsExplicitLegacyPassPass(Registry);
initializeGVNHoistLegacyPassPass(Registry);
initializeGVNSinkLegacyPassPass(Registry);
- initializeFlattenCFGPassPass(Registry);
+ initializeFlattenCFGLegacyPassPass(Registry);
initializeIRCELegacyPassPass(Registry);
initializeIndVarSimplifyLegacyPassPass(Registry);
initializeInferAddressSpacesPass(Registry);
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index ffa2f9adb978..d23925042b0a 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -648,13 +648,13 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) {
Value *Current = V;
// ExtInsts is built in the use-def order. Therefore, we apply them to V
// in the reversed order.
- for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) {
+ for (CastInst *I : llvm::reverse(ExtInsts)) {
if (Constant *C = dyn_cast<Constant>(Current)) {
// If Current is a constant, apply s/zext using ConstantExpr::getCast.
// ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
- Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType());
+ Current = ConstantExpr::getCast(I->getOpcode(), C, I->getType());
} else {
- Instruction *Ext = (*I)->clone();
+ Instruction *Ext = I->clone();
Ext->setOperand(0, Current);
Ext->insertBefore(IP);
Current = Ext;
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
new file mode 100644
index 000000000000..dfb9f608eab2
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -0,0 +1,942 @@
+//===- CodeLayout.cpp - Implementation of code layout algorithms ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// ExtTSP - layout of basic blocks with i-cache optimization.
+//
+// The algorithm tries to find a layout of nodes (basic blocks) of a given CFG
+// optimizing jump locality and thus processor I-cache utilization. This is
+// achieved via increasing the number of fall-through jumps and co-locating
+// frequently executed nodes together. The name follows the underlying
+// optimization problem, Extended-TSP, which is a generalization of classical
+// (maximum) Traveling Salesmen Problem.
+//
+// The algorithm is a greedy heuristic that works with chains (ordered lists)
+// of basic blocks. Initially all chains are isolated basic blocks. On every
+// iteration, we pick a pair of chains whose merging yields the biggest increase
+// in the ExtTSP score, which models how i-cache "friendly" a specific chain is.
+// A pair of chains giving the maximum gain is merged into a new chain. The
+// procedure stops when there is only one chain left, or when merging does not
+// increase ExtTSP. In the latter case, the remaining chains are sorted by
+// density in the decreasing order.
+//
+// An important aspect is the way two chains are merged. Unlike earlier
+// algorithms (e.g., based on the approach of Pettis-Hansen), two
+// chains, X and Y, are first split into three, X1, X2, and Y. Then we
+// consider all possible ways of gluing the three chains (e.g., X1YX2, X1X2Y,
+// X2X1Y, X2YX1, YX1X2, YX2X1) and choose the one producing the largest score.
+// This improves the quality of the final result (the search space is larger)
+// while keeping the implementation sufficiently fast.
+//
+// Reference:
+// * A. Newell and S. Pupyrev, Improved Basic Block Reordering,
+// IEEE Transactions on Computers, 2020
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CodeLayout.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "code-layout"
+
+// Algorithm-specific constants. The values are tuned for the best performance
+// of large-scale front-end bound binaries.
+static cl::opt<double>
+ ForwardWeight("ext-tsp-forward-weight", cl::Hidden, cl::init(0.1),
+ cl::desc("The weight of forward jumps for ExtTSP value"));
+
+static cl::opt<double>
+ BackwardWeight("ext-tsp-backward-weight", cl::Hidden, cl::init(0.1),
+ cl::desc("The weight of backward jumps for ExtTSP value"));
+
+static cl::opt<unsigned> ForwardDistance(
+ "ext-tsp-forward-distance", cl::Hidden, cl::init(1024),
+ cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP"));
+
+static cl::opt<unsigned> BackwardDistance(
+ "ext-tsp-backward-distance", cl::Hidden, cl::init(640),
+ cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
+
+// The maximum size of a chain for splitting. Larger values of the threshold
+// may yield better quality at the cost of worsen run-time.
+static cl::opt<unsigned> ChainSplitThreshold(
+ "ext-tsp-chain-split-threshold", cl::Hidden, cl::init(128),
+ cl::desc("The maximum size of a chain to apply splitting"));
+
+// The option enables splitting (large) chains along in-coming and out-going
+// jumps. This typically results in a better quality.
+static cl::opt<bool> EnableChainSplitAlongJumps(
+ "ext-tsp-enable-chain-split-along-jumps", cl::Hidden, cl::init(true),
+ cl::desc("The maximum size of a chain to apply splitting"));
+
+namespace {
+
+// Epsilon for comparison of doubles.
+constexpr double EPS = 1e-8;
+
+// Compute the Ext-TSP score for a jump between a given pair of blocks,
+// using their sizes, (estimated) addresses and the jump execution count.
+double extTSPScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr,
+ uint64_t Count) {
+ // Fallthrough
+ if (SrcAddr + SrcSize == DstAddr) {
+ // Assume that FallthroughWeight = 1.0 after normalization
+ return static_cast<double>(Count);
+ }
+ // Forward
+ if (SrcAddr + SrcSize < DstAddr) {
+ const auto Dist = DstAddr - (SrcAddr + SrcSize);
+ if (Dist <= ForwardDistance) {
+ double Prob = 1.0 - static_cast<double>(Dist) / ForwardDistance;
+ return ForwardWeight * Prob * Count;
+ }
+ return 0;
+ }
+ // Backward
+ const auto Dist = SrcAddr + SrcSize - DstAddr;
+ if (Dist <= BackwardDistance) {
+ double Prob = 1.0 - static_cast<double>(Dist) / BackwardDistance;
+ return BackwardWeight * Prob * Count;
+ }
+ return 0;
+}
+
+/// A type of merging two chains, X and Y. The former chain is split into
+/// X1 and X2 and then concatenated with Y in the order specified by the type.
+enum class MergeTypeTy : int { X_Y, X1_Y_X2, Y_X2_X1, X2_X1_Y };
+
+/// The gain of merging two chains, that is, the Ext-TSP score of the merge
+/// together with the corresponfiding merge 'type' and 'offset'.
+class MergeGainTy {
+public:
+ explicit MergeGainTy() {}
+ explicit MergeGainTy(double Score, size_t MergeOffset, MergeTypeTy MergeType)
+ : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {}
+
+ double score() const { return Score; }
+
+ size_t mergeOffset() const { return MergeOffset; }
+
+ MergeTypeTy mergeType() const { return MergeType; }
+
+ // Returns 'true' iff Other is preferred over this.
+ bool operator<(const MergeGainTy &Other) const {
+ return (Other.Score > EPS && Other.Score > Score + EPS);
+ }
+
+ // Update the current gain if Other is preferred over this.
+ void updateIfLessThan(const MergeGainTy &Other) {
+ if (*this < Other)
+ *this = Other;
+ }
+
+private:
+ double Score{-1.0};
+ size_t MergeOffset{0};
+ MergeTypeTy MergeType{MergeTypeTy::X_Y};
+};
+
+class Block;
+class Jump;
+class Chain;
+class ChainEdge;
+
+/// A node in the graph, typically corresponding to a basic block in CFG.
+class Block {
+public:
+ Block(const Block &) = delete;
+ Block(Block &&) = default;
+ Block &operator=(const Block &) = delete;
+ Block &operator=(Block &&) = default;
+
+ // The original index of the block in CFG.
+ size_t Index{0};
+ // The index of the block in the current chain.
+ size_t CurIndex{0};
+ // Size of the block in the binary.
+ uint64_t Size{0};
+ // Execution count of the block in the profile data.
+ uint64_t ExecutionCount{0};
+ // Current chain of the node.
+ Chain *CurChain{nullptr};
+ // An offset of the block in the current chain.
+ mutable uint64_t EstimatedAddr{0};
+ // Forced successor of the block in CFG.
+ Block *ForcedSucc{nullptr};
+ // Forced predecessor of the block in CFG.
+ Block *ForcedPred{nullptr};
+ // Outgoing jumps from the block.
+ std::vector<Jump *> OutJumps;
+ // Incoming jumps to the block.
+ std::vector<Jump *> InJumps;
+
+public:
+ explicit Block(size_t Index, uint64_t Size_, uint64_t EC)
+ : Index(Index), Size(Size_), ExecutionCount(EC) {}
+ bool isEntry() const { return Index == 0; }
+};
+
+/// An arc in the graph, typically corresponding to a jump between two blocks.
+class Jump {
+public:
+ Jump(const Jump &) = delete;
+ Jump(Jump &&) = default;
+ Jump &operator=(const Jump &) = delete;
+ Jump &operator=(Jump &&) = default;
+
+ // Source block of the jump.
+ Block *Source;
+ // Target block of the jump.
+ Block *Target;
+ // Execution count of the arc in the profile data.
+ uint64_t ExecutionCount{0};
+
+public:
+ explicit Jump(Block *Source, Block *Target, uint64_t ExecutionCount)
+ : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {}
+};
+
+/// A chain (ordered sequence) of blocks.
+class Chain {
+public:
+ Chain(const Chain &) = delete;
+ Chain(Chain &&) = default;
+ Chain &operator=(const Chain &) = delete;
+ Chain &operator=(Chain &&) = default;
+
+ explicit Chain(uint64_t Id, Block *Block)
+ : Id(Id), Score(0), Blocks(1, Block) {}
+
+ uint64_t id() const { return Id; }
+
+ bool isEntry() const { return Blocks[0]->Index == 0; }
+
+ double score() const { return Score; }
+
+ void setScore(double NewScore) { Score = NewScore; }
+
+ const std::vector<Block *> &blocks() const { return Blocks; }
+
+ const std::vector<std::pair<Chain *, ChainEdge *>> &edges() const {
+ return Edges;
+ }
+
+ ChainEdge *getEdge(Chain *Other) const {
+ for (auto It : Edges) {
+ if (It.first == Other)
+ return It.second;
+ }
+ return nullptr;
+ }
+
+ void removeEdge(Chain *Other) {
+ auto It = Edges.begin();
+ while (It != Edges.end()) {
+ if (It->first == Other) {
+ Edges.erase(It);
+ return;
+ }
+ It++;
+ }
+ }
+
+ void addEdge(Chain *Other, ChainEdge *Edge) {
+ Edges.push_back(std::make_pair(Other, Edge));
+ }
+
+ void merge(Chain *Other, const std::vector<Block *> &MergedBlocks) {
+ Blocks = MergedBlocks;
+ // Update the block's chains
+ for (size_t Idx = 0; Idx < Blocks.size(); Idx++) {
+ Blocks[Idx]->CurChain = this;
+ Blocks[Idx]->CurIndex = Idx;
+ }
+ }
+
+ void mergeEdges(Chain *Other);
+
+ void clear() {
+ Blocks.clear();
+ Blocks.shrink_to_fit();
+ Edges.clear();
+ Edges.shrink_to_fit();
+ }
+
+private:
+ // Unique chain identifier.
+ uint64_t Id;
+ // Cached ext-tsp score for the chain.
+ double Score;
+ // Blocks of the chain.
+ std::vector<Block *> Blocks;
+ // Adjacent chains and corresponding edges (lists of jumps).
+ std::vector<std::pair<Chain *, ChainEdge *>> Edges;
+};
+
+/// An edge in CFG representing jumps between two chains.
+/// When blocks are merged into chains, the edges are combined too so that
+/// there is always at most one edge between a pair of chains
+class ChainEdge {
+public:
+ ChainEdge(const ChainEdge &) = delete;
+ ChainEdge(ChainEdge &&) = default;
+ ChainEdge &operator=(const ChainEdge &) = delete;
+ ChainEdge &operator=(ChainEdge &&) = default;
+
+ explicit ChainEdge(Jump *Jump)
+ : SrcChain(Jump->Source->CurChain), DstChain(Jump->Target->CurChain),
+ Jumps(1, Jump) {}
+
+ const std::vector<Jump *> &jumps() const { return Jumps; }
+
+ void changeEndpoint(Chain *From, Chain *To) {
+ if (From == SrcChain)
+ SrcChain = To;
+ if (From == DstChain)
+ DstChain = To;
+ }
+
+ void appendJump(Jump *Jump) { Jumps.push_back(Jump); }
+
+ void moveJumps(ChainEdge *Other) {
+ Jumps.insert(Jumps.end(), Other->Jumps.begin(), Other->Jumps.end());
+ Other->Jumps.clear();
+ Other->Jumps.shrink_to_fit();
+ }
+
+ bool hasCachedMergeGain(Chain *Src, Chain *Dst) const {
+ return Src == SrcChain ? CacheValidForward : CacheValidBackward;
+ }
+
+ MergeGainTy getCachedMergeGain(Chain *Src, Chain *Dst) const {
+ return Src == SrcChain ? CachedGainForward : CachedGainBackward;
+ }
+
+ void setCachedMergeGain(Chain *Src, Chain *Dst, MergeGainTy MergeGain) {
+ if (Src == SrcChain) {
+ CachedGainForward = MergeGain;
+ CacheValidForward = true;
+ } else {
+ CachedGainBackward = MergeGain;
+ CacheValidBackward = true;
+ }
+ }
+
+ void invalidateCache() {
+ CacheValidForward = false;
+ CacheValidBackward = false;
+ }
+
+private:
+ // Source chain.
+ Chain *SrcChain{nullptr};
+ // Destination chain.
+ Chain *DstChain{nullptr};
+ // Original jumps in the binary with correspinding execution counts.
+ std::vector<Jump *> Jumps;
+ // Cached ext-tsp value for merging the pair of chains.
+ // Since the gain of merging (Src, Dst) and (Dst, Src) might be different,
+ // we store both values here.
+ MergeGainTy CachedGainForward;
+ MergeGainTy CachedGainBackward;
+ // Whether the cached value must be recomputed.
+ bool CacheValidForward{false};
+ bool CacheValidBackward{false};
+};
+
+void Chain::mergeEdges(Chain *Other) {
+ assert(this != Other && "cannot merge a chain with itself");
+
+ // Update edges adjacent to chain Other
+ for (auto EdgeIt : Other->Edges) {
+ const auto DstChain = EdgeIt.first;
+ const auto DstEdge = EdgeIt.second;
+ const auto TargetChain = DstChain == Other ? this : DstChain;
+ auto CurEdge = getEdge(TargetChain);
+ if (CurEdge == nullptr) {
+ DstEdge->changeEndpoint(Other, this);
+ this->addEdge(TargetChain, DstEdge);
+ if (DstChain != this && DstChain != Other) {
+ DstChain->addEdge(this, DstEdge);
+ }
+ } else {
+ CurEdge->moveJumps(DstEdge);
+ }
+ // Cleanup leftover edge
+ if (DstChain != Other) {
+ DstChain->removeEdge(Other);
+ }
+ }
+}
+
+using BlockIter = std::vector<Block *>::const_iterator;
+
+/// A wrapper around three chains of blocks; it is used to avoid extra
+/// instantiation of the vectors.
+class MergedChain {
+public:
+ MergedChain(BlockIter Begin1, BlockIter End1, BlockIter Begin2 = BlockIter(),
+ BlockIter End2 = BlockIter(), BlockIter Begin3 = BlockIter(),
+ BlockIter End3 = BlockIter())
+ : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
+ End3(End3) {}
+
+ template <typename F> void forEach(const F &Func) const {
+ for (auto It = Begin1; It != End1; It++)
+ Func(*It);
+ for (auto It = Begin2; It != End2; It++)
+ Func(*It);
+ for (auto It = Begin3; It != End3; It++)
+ Func(*It);
+ }
+
+ std::vector<Block *> getBlocks() const {
+ std::vector<Block *> Result;
+ Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) +
+ std::distance(Begin3, End3));
+ Result.insert(Result.end(), Begin1, End1);
+ Result.insert(Result.end(), Begin2, End2);
+ Result.insert(Result.end(), Begin3, End3);
+ return Result;
+ }
+
+ const Block *getFirstBlock() const { return *Begin1; }
+
+private:
+ BlockIter Begin1;
+ BlockIter End1;
+ BlockIter Begin2;
+ BlockIter End2;
+ BlockIter Begin3;
+ BlockIter End3;
+};
+
+/// The implementation of the ExtTSP algorithm.
+class ExtTSPImpl {
+ using EdgeT = std::pair<uint64_t, uint64_t>;
+ using EdgeCountMap = DenseMap<EdgeT, uint64_t>;
+
+public:
+ ExtTSPImpl(size_t NumNodes, const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const EdgeCountMap &EdgeCounts)
+ : NumNodes(NumNodes) {
+ initialize(NodeSizes, NodeCounts, EdgeCounts);
+ }
+
+ /// Run the algorithm and return an optimized ordering of blocks.
+ void run(std::vector<uint64_t> &Result) {
+ // Pass 1: Merge blocks with their mutually forced successors
+ mergeForcedPairs();
+
+ // Pass 2: Merge pairs of chains while improving the ExtTSP objective
+ mergeChainPairs();
+
+ // Pass 3: Merge cold blocks to reduce code size
+ mergeColdChains();
+
+ // Collect blocks from all chains
+ concatChains(Result);
+ }
+
+private:
+ /// Initialize the algorithm's data structures.
+ void initialize(const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const EdgeCountMap &EdgeCounts) {
+ // Initialize blocks
+ AllBlocks.reserve(NumNodes);
+ for (uint64_t Node = 0; Node < NumNodes; Node++) {
+ uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL);
+ uint64_t ExecutionCount = NodeCounts[Node];
+ // The execution count of the entry block is set to at least 1
+ if (Node == 0 && ExecutionCount == 0)
+ ExecutionCount = 1;
+ AllBlocks.emplace_back(Node, Size, ExecutionCount);
+ }
+
+ // Initialize jumps between blocks
+ SuccNodes = std::vector<std::vector<uint64_t>>(NumNodes);
+ PredNodes = std::vector<std::vector<uint64_t>>(NumNodes);
+ AllJumps.reserve(EdgeCounts.size());
+ for (auto It : EdgeCounts) {
+ auto Pred = It.first.first;
+ auto Succ = It.first.second;
+ // Ignore self-edges
+ if (Pred == Succ)
+ continue;
+
+ SuccNodes[Pred].push_back(Succ);
+ PredNodes[Succ].push_back(Pred);
+ auto ExecutionCount = It.second;
+ if (ExecutionCount > 0) {
+ auto &Block = AllBlocks[Pred];
+ auto &SuccBlock = AllBlocks[Succ];
+ AllJumps.emplace_back(&Block, &SuccBlock, ExecutionCount);
+ SuccBlock.InJumps.push_back(&AllJumps.back());
+ Block.OutJumps.push_back(&AllJumps.back());
+ }
+ }
+
+ // Initialize chains
+ AllChains.reserve(NumNodes);
+ HotChains.reserve(NumNodes);
+ for (auto &Block : AllBlocks) {
+ AllChains.emplace_back(Block.Index, &Block);
+ Block.CurChain = &AllChains.back();
+ if (Block.ExecutionCount > 0) {
+ HotChains.push_back(&AllChains.back());
+ }
+ }
+
+ // Initialize chain edges
+ AllEdges.reserve(AllJumps.size());
+ for (auto &Block : AllBlocks) {
+ for (auto &Jump : Block.OutJumps) {
+ const auto SuccBlock = Jump->Target;
+ auto CurEdge = Block.CurChain->getEdge(SuccBlock->CurChain);
+ // this edge is already present in the graph
+ if (CurEdge != nullptr) {
+ assert(SuccBlock->CurChain->getEdge(Block.CurChain) != nullptr);
+ CurEdge->appendJump(Jump);
+ continue;
+ }
+ // this is a new edge
+ AllEdges.emplace_back(Jump);
+ Block.CurChain->addEdge(SuccBlock->CurChain, &AllEdges.back());
+ SuccBlock->CurChain->addEdge(Block.CurChain, &AllEdges.back());
+ }
+ }
+ }
+
+ /// For a pair of blocks, A and B, block B is the forced successor of A,
+ /// if (i) all jumps (based on profile) from A goes to B and (ii) all jumps
+ /// to B are from A. Such blocks should be adjacent in the optimal ordering;
+ /// the method finds and merges such pairs of blocks.
+ void mergeForcedPairs() {
+ // Find fallthroughs based on edge weights
+ for (auto &Block : AllBlocks) {
+ if (SuccNodes[Block.Index].size() == 1 &&
+ PredNodes[SuccNodes[Block.Index][0]].size() == 1 &&
+ SuccNodes[Block.Index][0] != 0) {
+ size_t SuccIndex = SuccNodes[Block.Index][0];
+ Block.ForcedSucc = &AllBlocks[SuccIndex];
+ AllBlocks[SuccIndex].ForcedPred = &Block;
+ }
+ }
+
+ // There might be 'cycles' in the forced dependencies, since profile
+ // data isn't 100% accurate. Typically this is observed in loops, when the
+ // loop edges are the hottest successors for the basic blocks of the loop.
+ // Break the cycles by choosing the block with the smallest index as the
+ // head. This helps to keep the original order of the loops, which likely
+ // have already been rotated in the optimized manner.
+ for (auto &Block : AllBlocks) {
+ if (Block.ForcedSucc == nullptr || Block.ForcedPred == nullptr)
+ continue;
+
+ auto SuccBlock = Block.ForcedSucc;
+ while (SuccBlock != nullptr && SuccBlock != &Block) {
+ SuccBlock = SuccBlock->ForcedSucc;
+ }
+ if (SuccBlock == nullptr)
+ continue;
+ // Break the cycle
+ AllBlocks[Block.ForcedPred->Index].ForcedSucc = nullptr;
+ Block.ForcedPred = nullptr;
+ }
+
+ // Merge blocks with their fallthrough successors
+ for (auto &Block : AllBlocks) {
+ if (Block.ForcedPred == nullptr && Block.ForcedSucc != nullptr) {
+ auto CurBlock = &Block;
+ while (CurBlock->ForcedSucc != nullptr) {
+ const auto NextBlock = CurBlock->ForcedSucc;
+ mergeChains(Block.CurChain, NextBlock->CurChain, 0, MergeTypeTy::X_Y);
+ CurBlock = NextBlock;
+ }
+ }
+ }
+ }
+
+ /// Merge pairs of chains while improving the ExtTSP objective.
+ void mergeChainPairs() {
+ /// Deterministically compare pairs of chains
+ auto compareChainPairs = [](const Chain *A1, const Chain *B1,
+ const Chain *A2, const Chain *B2) {
+ if (A1 != A2)
+ return A1->id() < A2->id();
+ return B1->id() < B2->id();
+ };
+
+ while (HotChains.size() > 1) {
+ Chain *BestChainPred = nullptr;
+ Chain *BestChainSucc = nullptr;
+ auto BestGain = MergeGainTy();
+ // Iterate over all pairs of chains
+ for (auto ChainPred : HotChains) {
+ // Get candidates for merging with the current chain
+ for (auto EdgeIter : ChainPred->edges()) {
+ auto ChainSucc = EdgeIter.first;
+ auto ChainEdge = EdgeIter.second;
+ // Ignore loop edges
+ if (ChainPred == ChainSucc)
+ continue;
+
+ // Compute the gain of merging the two chains
+ auto CurGain = getBestMergeGain(ChainPred, ChainSucc, ChainEdge);
+ if (CurGain.score() <= EPS)
+ continue;
+
+ if (BestGain < CurGain ||
+ (std::abs(CurGain.score() - BestGain.score()) < EPS &&
+ compareChainPairs(ChainPred, ChainSucc, BestChainPred,
+ BestChainSucc))) {
+ BestGain = CurGain;
+ BestChainPred = ChainPred;
+ BestChainSucc = ChainSucc;
+ }
+ }
+ }
+
+ // Stop merging when there is no improvement
+ if (BestGain.score() <= EPS)
+ break;
+
+ // Merge the best pair of chains
+ mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
+ BestGain.mergeType());
+ }
+ }
+
+ /// Merge cold blocks to reduce code size.
+ void mergeColdChains() {
+ for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {
+ // Iterating over neighbors in the reverse order to make sure original
+ // fallthrough jumps are merged first
+ size_t NumSuccs = SuccNodes[SrcBB].size();
+ for (size_t Idx = 0; Idx < NumSuccs; Idx++) {
+ auto DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1];
+ auto SrcChain = AllBlocks[SrcBB].CurChain;
+ auto DstChain = AllBlocks[DstBB].CurChain;
+ if (SrcChain != DstChain && !DstChain->isEntry() &&
+ SrcChain->blocks().back()->Index == SrcBB &&
+ DstChain->blocks().front()->Index == DstBB) {
+ mergeChains(SrcChain, DstChain, 0, MergeTypeTy::X_Y);
+ }
+ }
+ }
+ }
+
+ /// Compute the Ext-TSP score for a given block order and a list of jumps.
+ double extTSPScore(const MergedChain &MergedBlocks,
+ const std::vector<Jump *> &Jumps) const {
+ if (Jumps.empty())
+ return 0.0;
+ uint64_t CurAddr = 0;
+ MergedBlocks.forEach([&](const Block *BB) {
+ BB->EstimatedAddr = CurAddr;
+ CurAddr += BB->Size;
+ });
+
+ double Score = 0;
+ for (auto &Jump : Jumps) {
+ const auto SrcBlock = Jump->Source;
+ const auto DstBlock = Jump->Target;
+ Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
+ DstBlock->EstimatedAddr, Jump->ExecutionCount);
+ }
+ return Score;
+ }
+
+ /// Compute the gain of merging two chains.
+ ///
+ /// The function considers all possible ways of merging two chains and
+ /// computes the one having the largest increase in ExtTSP objective. The
+ /// result is a pair with the first element being the gain and the second
+ /// element being the corresponding merging type.
+ MergeGainTy getBestMergeGain(Chain *ChainPred, Chain *ChainSucc,
+ ChainEdge *Edge) const {
+ if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
+ return Edge->getCachedMergeGain(ChainPred, ChainSucc);
+ }
+
+ // Precompute jumps between ChainPred and ChainSucc
+ auto Jumps = Edge->jumps();
+ auto EdgePP = ChainPred->getEdge(ChainPred);
+ if (EdgePP != nullptr) {
+ Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end());
+ }
+ assert(!Jumps.empty() && "trying to merge chains w/o jumps");
+
+ // The object holds the best currently chosen gain of merging the two chains
+ MergeGainTy Gain = MergeGainTy();
+
+ /// Given a merge offset and a list of merge types, try to merge two chains
+ /// and update Gain with a better alternative
+ auto tryChainMerging = [&](size_t Offset,
+ const std::vector<MergeTypeTy> &MergeTypes) {
+ // Skip merging corresponding to concatenation w/o splitting
+ if (Offset == 0 || Offset == ChainPred->blocks().size())
+ return;
+ // Skip merging if it breaks Forced successors
+ auto BB = ChainPred->blocks()[Offset - 1];
+ if (BB->ForcedSucc != nullptr)
+ return;
+ // Apply the merge, compute the corresponding gain, and update the best
+ // value, if the merge is beneficial
+ for (auto &MergeType : MergeTypes) {
+ Gain.updateIfLessThan(
+ computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType));
+ }
+ };
+
+ // Try to concatenate two chains w/o splitting
+ Gain.updateIfLessThan(
+ computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeTy::X_Y));
+
+ if (EnableChainSplitAlongJumps) {
+ // Attach (a part of) ChainPred before the first block of ChainSucc
+ for (auto &Jump : ChainSucc->blocks().front()->InJumps) {
+ const auto SrcBlock = Jump->Source;
+ if (SrcBlock->CurChain != ChainPred)
+ continue;
+ size_t Offset = SrcBlock->CurIndex + 1;
+ tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::X2_X1_Y});
+ }
+
+ // Attach (a part of) ChainPred after the last block of ChainSucc
+ for (auto &Jump : ChainSucc->blocks().back()->OutJumps) {
+ const auto DstBlock = Jump->Source;
+ if (DstBlock->CurChain != ChainPred)
+ continue;
+ size_t Offset = DstBlock->CurIndex;
+ tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1});
+ }
+ }
+
+ // Try to break ChainPred in various ways and concatenate with ChainSucc
+ if (ChainPred->blocks().size() <= ChainSplitThreshold) {
+ for (size_t Offset = 1; Offset < ChainPred->blocks().size(); Offset++) {
+ // Try to split the chain in different ways. In practice, applying
+ // X2_Y_X1 merging is almost never provides benefits; thus, we exclude
+ // it from consideration to reduce the search space
+ tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1,
+ MergeTypeTy::X2_X1_Y});
+ }
+ }
+ Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
+ return Gain;
+ }
+
+ /// Compute the score gain of merging two chains, respecting a given
+ /// merge 'type' and 'offset'.
+ ///
+ /// The two chains are not modified in the method.
+ MergeGainTy computeMergeGain(const Chain *ChainPred, const Chain *ChainSucc,
+ const std::vector<Jump *> &Jumps,
+ size_t MergeOffset,
+ MergeTypeTy MergeType) const {
+ auto MergedBlocks = mergeBlocks(ChainPred->blocks(), ChainSucc->blocks(),
+ MergeOffset, MergeType);
+
+ // Do not allow a merge that does not preserve the original entry block
+ if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
+ !MergedBlocks.getFirstBlock()->isEntry())
+ return MergeGainTy();
+
+ // The gain for the new chain
+ auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->score();
+ return MergeGainTy(NewGainScore, MergeOffset, MergeType);
+ }
+
+ /// Merge two chains of blocks respecting a given merge 'type' and 'offset'.
+ ///
+ /// If MergeType == 0, then the result is a concatentation of two chains.
+ /// Otherwise, the first chain is cut into two sub-chains at the offset,
+ /// and merged using all possible ways of concatenating three chains.
+ MergedChain mergeBlocks(const std::vector<Block *> &X,
+ const std::vector<Block *> &Y, size_t MergeOffset,
+ MergeTypeTy MergeType) const {
+ // Split the first chain, X, into X1 and X2
+ BlockIter BeginX1 = X.begin();
+ BlockIter EndX1 = X.begin() + MergeOffset;
+ BlockIter BeginX2 = X.begin() + MergeOffset;
+ BlockIter EndX2 = X.end();
+ BlockIter BeginY = Y.begin();
+ BlockIter EndY = Y.end();
+
+ // Construct a new chain from the three existing ones
+ switch (MergeType) {
+ case MergeTypeTy::X_Y:
+ return MergedChain(BeginX1, EndX2, BeginY, EndY);
+ case MergeTypeTy::X1_Y_X2:
+ return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
+ case MergeTypeTy::Y_X2_X1:
+ return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
+ case MergeTypeTy::X2_X1_Y:
+ return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
+ }
+ llvm_unreachable("unexpected chain merge type");
+ }
+
+ /// Merge chain From into chain Into, update the list of active chains,
+ /// adjacency information, and the corresponding cached values.
+ void mergeChains(Chain *Into, Chain *From, size_t MergeOffset,
+ MergeTypeTy MergeType) {
+ assert(Into != From && "a chain cannot be merged with itself");
+
+ // Merge the blocks
+ auto MergedBlocks =
+ mergeBlocks(Into->blocks(), From->blocks(), MergeOffset, MergeType);
+ Into->merge(From, MergedBlocks.getBlocks());
+ Into->mergeEdges(From);
+ From->clear();
+
+ // Update cached ext-tsp score for the new chain
+ auto SelfEdge = Into->getEdge(Into);
+ if (SelfEdge != nullptr) {
+ MergedBlocks = MergedChain(Into->blocks().begin(), Into->blocks().end());
+ Into->setScore(extTSPScore(MergedBlocks, SelfEdge->jumps()));
+ }
+
+ // Remove chain From from the list of active chains
+ auto Iter = std::remove(HotChains.begin(), HotChains.end(), From);
+ HotChains.erase(Iter, HotChains.end());
+
+ // Invalidate caches
+ for (auto EdgeIter : Into->edges()) {
+ EdgeIter.second->invalidateCache();
+ }
+ }
+
+ /// Concatenate all chains into a final order of blocks.
+ void concatChains(std::vector<uint64_t> &Order) {
+ // Collect chains and calculate some stats for their sorting
+ std::vector<Chain *> SortedChains;
+ DenseMap<const Chain *, double> ChainDensity;
+ for (auto &Chain : AllChains) {
+ if (!Chain.blocks().empty()) {
+ SortedChains.push_back(&Chain);
+ // Using doubles to avoid overflow of ExecutionCount
+ double Size = 0;
+ double ExecutionCount = 0;
+ for (auto Block : Chain.blocks()) {
+ Size += static_cast<double>(Block->Size);
+ ExecutionCount += static_cast<double>(Block->ExecutionCount);
+ }
+ assert(Size > 0 && "a chain of zero size");
+ ChainDensity[&Chain] = ExecutionCount / Size;
+ }
+ }
+
+ // Sorting chains by density in the decreasing order
+ std::stable_sort(SortedChains.begin(), SortedChains.end(),
+ [&](const Chain *C1, const Chain *C2) {
+ // Makre sure the original entry block is at the
+ // beginning of the order
+ if (C1->isEntry() != C2->isEntry()) {
+ return C1->isEntry();
+ }
+
+ const double D1 = ChainDensity[C1];
+ const double D2 = ChainDensity[C2];
+ // Compare by density and break ties by chain identifiers
+ return (D1 != D2) ? (D1 > D2) : (C1->id() < C2->id());
+ });
+
+ // Collect the blocks in the order specified by their chains
+ Order.reserve(NumNodes);
+ for (auto Chain : SortedChains) {
+ for (auto Block : Chain->blocks()) {
+ Order.push_back(Block->Index);
+ }
+ }
+ }
+
+private:
+ /// The number of nodes in the graph.
+ const size_t NumNodes;
+
+ /// Successors of each node.
+ std::vector<std::vector<uint64_t>> SuccNodes;
+
+ /// Predecessors of each node.
+ std::vector<std::vector<uint64_t>> PredNodes;
+
+ /// All basic blocks.
+ std::vector<Block> AllBlocks;
+
+ /// All jumps between blocks.
+ std::vector<Jump> AllJumps;
+
+ /// All chains of basic blocks.
+ std::vector<Chain> AllChains;
+
+ /// All edges between chains.
+ std::vector<ChainEdge> AllEdges;
+
+ /// Active chains. The vector gets updated at runtime when chains are merged.
+ std::vector<Chain *> HotChains;
+};
+
+} // end of anonymous namespace
+
+std::vector<uint64_t> llvm::applyExtTspLayout(
+ const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const DenseMap<std::pair<uint64_t, uint64_t>, uint64_t> &EdgeCounts) {
+ size_t NumNodes = NodeSizes.size();
+
+ // Verify correctness of the input data.
+ assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input");
+ assert(NumNodes > 2 && "Incorrect input");
+
+ // Apply the reordering algorithm.
+ auto Alg = ExtTSPImpl(NumNodes, NodeSizes, NodeCounts, EdgeCounts);
+ std::vector<uint64_t> Result;
+ Alg.run(Result);
+
+ // Verify correctness of the output.
+ assert(Result.front() == 0 && "Original entry point is not preserved");
+ assert(Result.size() == NumNodes && "Incorrect size of reordered layout");
+ return Result;
+}
+
+double llvm::calcExtTspScore(
+ const std::vector<uint64_t> &Order, const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const DenseMap<std::pair<uint64_t, uint64_t>, uint64_t> &EdgeCounts) {
+ // Estimate addresses of the blocks in memory
+ auto Addr = std::vector<uint64_t>(NodeSizes.size(), 0);
+ for (size_t Idx = 1; Idx < Order.size(); Idx++) {
+ Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]];
+ }
+
+ // Increase the score for each jump
+ double Score = 0;
+ for (auto It : EdgeCounts) {
+ auto Pred = It.first.first;
+ auto Succ = It.first.second;
+ uint64_t Count = It.second;
+ Score += extTSPScore(Addr[Pred], NodeSizes[Pred], Addr[Succ], Count);
+ }
+ return Score;
+}
+
+double llvm::calcExtTspScore(
+ const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const DenseMap<std::pair<uint64_t, uint64_t>, uint64_t> &EdgeCounts) {
+ auto Order = std::vector<uint64_t>(NodeSizes.size());
+ for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) {
+ Order[Idx] = Idx;
+ }
+ return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts);
+}
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index fc7083b0c30d..589622d69578 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -596,7 +596,7 @@ bool llvm::checkDebugInfoMetadata(Module &M,
auto DILocsBefore = DIPreservationMap[NameOfWrappedPass].DILocations;
auto DILocsAfter = DIPreservationAfter[NameOfWrappedPass].DILocations;
- auto InstToDelete = DIPreservationAfter[NameOfWrappedPass].InstToDelete;
+ auto InstToDelete = DIPreservationMap[NameOfWrappedPass].InstToDelete;
auto DIVarsBefore = DIPreservationMap[NameOfWrappedPass].DIVariables;
auto DIVarsAfter = DIPreservationAfter[NameOfWrappedPass].DIVariables;
diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 326864803d7c..06596f7b04e1 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -58,6 +58,14 @@ int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
return 0;
}
+int FunctionComparator::cmpAligns(Align L, Align R) const {
+ if (L.value() < R.value())
+ return -1;
+ if (L.value() > R.value())
+ return 1;
+ return 0;
+}
+
int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
if ((int)L < (int)R)
return -1;
@@ -556,13 +564,12 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res = cmpTypes(AI->getAllocatedType(),
cast<AllocaInst>(R)->getAllocatedType()))
return Res;
- return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment());
+ return cmpAligns(AI->getAlign(), cast<AllocaInst>(R)->getAlign());
}
if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
return Res;
- if (int Res =
- cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
+ if (int Res = cmpAligns(LI->getAlign(), cast<LoadInst>(R)->getAlign()))
return Res;
if (int Res =
cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
@@ -578,8 +585,7 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res =
cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
return Res;
- if (int Res =
- cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
+ if (int Res = cmpAligns(SI->getAlign(), cast<StoreInst>(R)->getAlign()))
return Res;
if (int Res =
cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index ec926b1f5a94..ecad79b68185 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -402,6 +402,18 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
return wouldInstructionBeTriviallyDead(I, TLI);
}
+bool llvm::wouldInstructionBeTriviallyDeadOnUnusedPaths(
+ Instruction *I, const TargetLibraryInfo *TLI) {
+ // Instructions that are "markers" and have implied meaning on code around
+ // them (without explicit uses), are not dead on unused paths.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::stacksave ||
+ II->getIntrinsicID() == Intrinsic::launder_invariant_group ||
+ II->isLifetimeStartOrEnd())
+ return false;
+ return wouldInstructionBeTriviallyDead(I, TLI);
+}
+
bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
const TargetLibraryInfo *TLI) {
if (I->isTerminator())
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index f3cf42be8ba1..69fd110dc3c2 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -104,9 +104,7 @@ bool llvm::canPeel(Loop *L) {
// note that LoopPeeling currently can only update the branch weights of latch
// blocks and branch weights to blocks with deopt or unreachable do not need
// updating.
- return all_of(Exits, [](const BasicBlock *BB) {
- return IsBlockFollowedByDeoptOrUnreachable(BB);
- });
+ return llvm::all_of(Exits, IsBlockFollowedByDeoptOrUnreachable);
}
// This function calculates the number of iterations after which the given Phi
@@ -333,6 +331,31 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
return DesiredPeelCount;
}
+/// This "heuristic" exactly matches implicit behavior which used to exist
+/// inside getLoopEstimatedTripCount. It was added here to keep an
+/// improvement inside that API from causing peeling to become more agressive.
+/// This should probably be removed.
+static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return true;
+
+ BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
+ return true;
+
+ assert((LatchBR->getSuccessor(0) == L->getHeader() ||
+ LatchBR->getSuccessor(1) == L->getHeader()) &&
+ "At least one edge out of the latch must go to the header");
+
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getUniqueNonLatchExitBlocks(ExitBlocks);
+ return any_of(ExitBlocks, [](const BasicBlock *EB) {
+ return !EB->getTerminatingDeoptimizeCall();
+ });
+}
+
+
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
@@ -436,6 +459,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// We only do this in the presence of profile information, since otherwise
// our estimates of the trip count are not reliable enough.
if (L->getHeader()->getParent()->hasProfileData()) {
+ if (violatesLegacyMultiExitLoopCheck(L))
+ return;
Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
if (!PeelCount)
return;
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index c8e42acdffb3..93157bd87c34 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -773,8 +773,8 @@ void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
}
-/// Checks if \p L has single exit through latch block except possibly
-/// "deoptimizing" exits. Returns branch instruction terminating the loop
+/// Checks if \p L has an exiting latch branch. There may also be other
+/// exiting blocks. Returns branch instruction terminating the loop
/// latch if above check is successful, nullptr otherwise.
static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) {
BasicBlock *Latch = L->getLoopLatch();
@@ -789,53 +789,61 @@ static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) {
LatchBR->getSuccessor(1) == L->getHeader()) &&
"At least one edge out of the latch must go to the header");
- SmallVector<BasicBlock *, 4> ExitBlocks;
- L->getUniqueNonLatchExitBlocks(ExitBlocks);
- if (any_of(ExitBlocks, [](const BasicBlock *EB) {
- return !EB->getTerminatingDeoptimizeCall();
- }))
- return nullptr;
-
return LatchBR;
}
-Optional<unsigned>
-llvm::getLoopEstimatedTripCount(Loop *L,
- unsigned *EstimatedLoopInvocationWeight) {
- // Support loops with an exiting latch and other existing exists only
- // deoptimize.
- BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
- if (!LatchBranch)
- return None;
-
+/// Return the estimated trip count for any exiting branch which dominates
+/// the loop latch.
+static Optional<uint64_t>
+getEstimatedTripCount(BranchInst *ExitingBranch, Loop *L,
+ uint64_t &OrigExitWeight) {
// To estimate the number of times the loop body was executed, we want to
// know the number of times the backedge was taken, vs. the number of times
// we exited the loop.
- uint64_t BackedgeTakenWeight, LatchExitWeight;
- if (!LatchBranch->extractProfMetadata(BackedgeTakenWeight, LatchExitWeight))
+ uint64_t LoopWeight, ExitWeight;
+ if (!ExitingBranch->extractProfMetadata(LoopWeight, ExitWeight))
return None;
- if (LatchBranch->getSuccessor(0) != L->getHeader())
- std::swap(BackedgeTakenWeight, LatchExitWeight);
+ if (L->contains(ExitingBranch->getSuccessor(1)))
+ std::swap(LoopWeight, ExitWeight);
- if (!LatchExitWeight)
+ if (!ExitWeight)
+ // Don't have a way to return predicated infinite
return None;
- if (EstimatedLoopInvocationWeight)
- *EstimatedLoopInvocationWeight = LatchExitWeight;
+ OrigExitWeight = ExitWeight;
- // Estimated backedge taken count is a ratio of the backedge taken weight by
- // the weight of the edge exiting the loop, rounded to nearest.
- uint64_t BackedgeTakenCount =
- llvm::divideNearest(BackedgeTakenWeight, LatchExitWeight);
- // Estimated trip count is one plus estimated backedge taken count.
- return BackedgeTakenCount + 1;
+ // Estimated exit count is a ratio of the loop weight by the weight of the
+ // edge exiting the loop, rounded to nearest.
+ uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
+ // Estimated trip count is one plus estimated exit count.
+ return ExitCount + 1;
+}
+
+Optional<unsigned>
+llvm::getLoopEstimatedTripCount(Loop *L,
+ unsigned *EstimatedLoopInvocationWeight) {
+ // Currently we take the estimate exit count only from the loop latch,
+ // ignoring other exiting blocks. This can overestimate the trip count
+ // if we exit through another exit, but can never underestimate it.
+ // TODO: incorporate information from other exits
+ if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
+ uint64_t ExitWeight;
+ if (Optional<uint64_t> EstTripCount =
+ getEstimatedTripCount(LatchBranch, L, ExitWeight)) {
+ if (EstimatedLoopInvocationWeight)
+ *EstimatedLoopInvocationWeight = ExitWeight;
+ return *EstTripCount;
+ }
+ }
+ return None;
}
bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
unsigned EstimatedloopInvocationWeight) {
- // Support loops with an exiting latch and other existing exists only
- // deoptimize.
+ // At the moment, we currently support changing the estimate trip count of
+ // the latch branch only. We could extend this API to manipulate estimated
+ // trip counts for any exit.
BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
if (!LatchBranch)
return false;
@@ -923,8 +931,7 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
// Helper to generate an ordered reduction.
Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
- unsigned Op, RecurKind RdxKind,
- ArrayRef<Value *> RedOps) {
+ unsigned Op, RecurKind RdxKind) {
unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
// Extract and apply reduction ops in ascending order:
@@ -942,9 +949,6 @@ Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
"Invalid min/max");
Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
}
-
- if (!RedOps.empty())
- propagateIRFlags(Result, RedOps);
}
return Result;
@@ -952,14 +956,20 @@ Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
// Helper to generate a log2 shuffle reduction.
Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
- unsigned Op, RecurKind RdxKind,
- ArrayRef<Value *> RedOps) {
+ unsigned Op, RecurKind RdxKind) {
unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each
// round.
assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!");
+ // Note: fast-math-flags flags are controlled by the builder configuration
+ // and are assumed to apply to all generated arithmetic instructions. Other
+ // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
+ // of the builder configuration, and since they're not passed explicitly,
+ // will never be relevant here. Note that it would be generally unsound to
+ // propagate these from an intrinsic call to the expansion anyways as we/
+ // change the order of operations.
Value *TmpVec = Src;
SmallVector<int, 32> ShuffleMask(VF);
for (unsigned i = VF; i != 1; i >>= 1) {
@@ -973,7 +983,6 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
- // The builder propagates its fast-math-flags setting.
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
"bin.rdx");
} else {
@@ -981,13 +990,6 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
"Invalid min/max");
TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
}
- if (!RedOps.empty())
- propagateIRFlags(TmpVec, RedOps);
-
- // We may compute the reassociated scalar ops in a way that does not
- // preserve nsw/nuw etc. Conservatively, drop those flags.
- if (auto *ReductionInst = dyn_cast<Instruction>(TmpVec))
- ReductionInst->dropPoisonGeneratingFlags();
}
// The result is in the first element of the vector.
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
@@ -1035,8 +1037,7 @@ Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
const TargetTransformInfo *TTI,
- Value *Src, RecurKind RdxKind,
- ArrayRef<Value *> RedOps) {
+ Value *Src, RecurKind RdxKind) {
auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
switch (RdxKind) {
case RecurKind::Add:
diff --git a/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 3ce10535d45f..9fba2f3f86b5 100644
--- a/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -15,6 +15,7 @@
#include "llvm/Transforms/Utils/MetaRenamer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -31,10 +32,36 @@
#include "llvm/IR/TypeFinder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
+static cl::opt<std::string> RenameExcludeFunctionPrefixes(
+ "rename-exclude-function-prefixes",
+ cl::desc("Prefixes for functions that don't need to be renamed, separated "
+ "by a comma"),
+ cl::Hidden);
+
+static cl::opt<std::string> RenameExcludeAliasPrefixes(
+ "rename-exclude-alias-prefixes",
+ cl::desc("Prefixes for aliases that don't need to be renamed, separated "
+ "by a comma"),
+ cl::Hidden);
+
+static cl::opt<std::string> RenameExcludeGlobalPrefixes(
+ "rename-exclude-global-prefixes",
+ cl::desc(
+ "Prefixes for global values that don't need to be renamed, separated "
+ "by a comma"),
+ cl::Hidden);
+
+static cl::opt<std::string> RenameExcludeStructPrefixes(
+ "rename-exclude-struct-prefixes",
+ cl::desc("Prefixes for structs that don't need to be renamed, separated "
+ "by a comma"),
+ cl::Hidden);
+
static const char *const metaNames[] = {
// See http://en.wikipedia.org/wiki/Metasyntactic_variable
"foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
@@ -66,6 +93,18 @@ struct Renamer {
PRNG prng;
};
+static void
+parseExcludedPrefixes(StringRef PrefixesStr,
+ SmallVectorImpl<StringRef> &ExcludedPrefixes) {
+ for (;;) {
+ auto PrefixesSplit = PrefixesStr.split(',');
+ if (PrefixesSplit.first.empty())
+ break;
+ ExcludedPrefixes.push_back(PrefixesSplit.first);
+ PrefixesStr = PrefixesSplit.second;
+ }
+}
+
void MetaRename(Function &F) {
for (Argument &Arg : F.args())
if (!Arg.getType()->isVoidTy())
@@ -91,10 +130,26 @@ void MetaRename(Module &M,
Renamer renamer(randSeed);
+ SmallVector<StringRef, 8> ExcludedAliasesPrefixes;
+ SmallVector<StringRef, 8> ExcludedGlobalsPrefixes;
+ SmallVector<StringRef, 8> ExcludedStructsPrefixes;
+ SmallVector<StringRef, 8> ExcludedFuncPrefixes;
+ parseExcludedPrefixes(RenameExcludeAliasPrefixes, ExcludedAliasesPrefixes);
+ parseExcludedPrefixes(RenameExcludeGlobalPrefixes, ExcludedGlobalsPrefixes);
+ parseExcludedPrefixes(RenameExcludeStructPrefixes, ExcludedStructsPrefixes);
+ parseExcludedPrefixes(RenameExcludeFunctionPrefixes, ExcludedFuncPrefixes);
+
+ auto IsNameExcluded = [](StringRef &Name,
+ SmallVectorImpl<StringRef> &ExcludedPrefixes) {
+ return any_of(ExcludedPrefixes,
+ [&Name](auto &Prefix) { return Name.startswith(Prefix); });
+ };
+
// Rename all aliases
for (GlobalAlias &GA : M.aliases()) {
StringRef Name = GA.getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ IsNameExcluded(Name, ExcludedAliasesPrefixes))
continue;
GA.setName("alias");
@@ -103,7 +158,8 @@ void MetaRename(Module &M,
// Rename all global variables
for (GlobalVariable &GV : M.globals()) {
StringRef Name = GV.getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ IsNameExcluded(Name, ExcludedGlobalsPrefixes))
continue;
GV.setName("global");
@@ -113,7 +169,9 @@ void MetaRename(Module &M,
TypeFinder StructTypes;
StructTypes.run(M, true);
for (StructType *STy : StructTypes) {
- if (STy->isLiteral() || STy->getName().empty())
+ StringRef Name = STy->getName();
+ if (STy->isLiteral() || Name.empty() ||
+ IsNameExcluded(Name, ExcludedStructsPrefixes))
continue;
SmallString<128> NameStorage;
@@ -128,7 +186,8 @@ void MetaRename(Module &M,
// Leave library functions alone because their presence or absence could
// affect the behavior of other passes.
if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
- GetTLI(F).getLibFunc(F, Tmp))
+ GetTLI(F).getLibFunc(F, Tmp) ||
+ IsNameExcluded(Name, ExcludedFuncPrefixes))
continue;
// Leave @main alone. The output of -metarenamer might be passed to
diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
index 3ebc89158173..65207056a3f4 100644
--- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
+++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -144,6 +144,10 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) {
Value *Offset =
Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift");
+ // Insert the call to load.relative instrinsic before LOAD.
+ // GEP might not be immediately followed by a LOAD, like it can be hoisted
+ // outside the loop or another instruction might be inserted them in between.
+ Builder.SetInsertPoint(Load);
Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration(
&M, Intrinsic::load_relative, {Index->getType()});
Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy());
diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
index 9495e442e0bf..2f2dff6b5f0b 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -220,7 +220,7 @@ private:
Now = Pred;
}
- assert(PathCapacity > 0 && "found incorrect augmenting path");
+ assert(PathCapacity > 0 && "found an incorrect augmenting path");
// Update the flow along the path
Now = Target;
@@ -271,6 +271,352 @@ private:
uint64_t Target;
};
+/// A post-processing adjustment of control flow. It applies two steps by
+/// rerouting some flow and making it more realistic:
+///
+/// - First, it removes all isolated components ("islands") with a positive flow
+/// that are unreachable from the entry block. For every such component, we
+/// find the shortest from the entry to an exit passing through the component,
+/// and increase the flow by one unit along the path.
+///
+/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
+/// with no sampled counts. Then it rebalnces the flow that goes through such
+/// a subgraph so that each branch is taken with probability 50%.
+/// An unknown subgraph is such that for every two nodes u and v:
+/// - u dominates v and u is not unknown;
+/// - v post-dominates u; and
+/// - all inner-nodes of all (u,v)-paths are unknown.
+///
+class FlowAdjuster {
+public:
+ FlowAdjuster(FlowFunction &Func) : Func(Func) {
+ assert(Func.Blocks[Func.Entry].isEntry() &&
+ "incorrect index of the entry block");
+ }
+
+ // Run the post-processing
+ void run() {
+ /// Adjust the flow to get rid of isolated components.
+ joinIsolatedComponents();
+
+ /// Rebalance the flow inside unknown subgraphs.
+ rebalanceUnknownSubgraphs();
+ }
+
+ /// The probability for the first successor of a unknown subgraph
+ static constexpr double UnknownFirstSuccProbability = 0.5;
+
+private:
+ void joinIsolatedComponents() {
+ // Find blocks that are reachable from the source
+ auto Visited = std::vector<bool>(NumBlocks(), false);
+ findReachable(Func.Entry, Visited);
+
+ // Iterate over all non-reachable blocks and adjust their weights
+ for (uint64_t I = 0; I < NumBlocks(); I++) {
+ auto &Block = Func.Blocks[I];
+ if (Block.Flow > 0 && !Visited[I]) {
+ // Find a path from the entry to an exit passing through the block I
+ auto Path = findShortestPath(I);
+ // Increase the flow along the path
+ assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
+ "incorrectly computed path adjusting control flow");
+ Func.Blocks[Func.Entry].Flow += 1;
+ for (auto &Jump : Path) {
+ Jump->Flow += 1;
+ Func.Blocks[Jump->Target].Flow += 1;
+ // Update reachability
+ findReachable(Jump->Target, Visited);
+ }
+ }
+ }
+ }
+
+ /// Run BFS from a given block along the jumps with a positive flow and mark
+ /// all reachable blocks.
+ void findReachable(uint64_t Src, std::vector<bool> &Visited) {
+ if (Visited[Src])
+ return;
+ std::queue<uint64_t> Queue;
+ Queue.push(Src);
+ Visited[Src] = true;
+ while (!Queue.empty()) {
+ Src = Queue.front();
+ Queue.pop();
+ for (auto Jump : Func.Blocks[Src].SuccJumps) {
+ uint64_t Dst = Jump->Target;
+ if (Jump->Flow > 0 && !Visited[Dst]) {
+ Queue.push(Dst);
+ Visited[Dst] = true;
+ }
+ }
+ }
+ }
+
+ /// Find the shortest path from the entry block to an exit block passing
+ /// through a given block.
+ std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
+ // A path from the entry block to BlockIdx
+ auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
+ // A path from BlockIdx to an exit block
+ auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
+
+ // Concatenate the two paths
+ std::vector<FlowJump *> Result;
+ Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
+ Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
+ return Result;
+ }
+
+ /// Apply the Dijkstra algorithm to find the shortest path from a given
+ /// Source to a given Target block.
+ /// If Target == -1, then the path ends at an exit block.
+ std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
+ // Quit early, if possible
+ if (Source == Target)
+ return std::vector<FlowJump *>();
+ if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
+ return std::vector<FlowJump *>();
+
+ // Initialize data structures
+ auto Distance = std::vector<int64_t>(NumBlocks(), INF);
+ auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
+ Distance[Source] = 0;
+ std::set<std::pair<uint64_t, uint64_t>> Queue;
+ Queue.insert(std::make_pair(Distance[Source], Source));
+
+ // Run the Dijkstra algorithm
+ while (!Queue.empty()) {
+ uint64_t Src = Queue.begin()->second;
+ Queue.erase(Queue.begin());
+ // If we found a solution, quit early
+ if (Src == Target ||
+ (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
+ break;
+
+ for (auto Jump : Func.Blocks[Src].SuccJumps) {
+ uint64_t Dst = Jump->Target;
+ int64_t JumpDist = jumpDistance(Jump);
+ if (Distance[Dst] > Distance[Src] + JumpDist) {
+ Queue.erase(std::make_pair(Distance[Dst], Dst));
+
+ Distance[Dst] = Distance[Src] + JumpDist;
+ Parent[Dst] = Jump;
+
+ Queue.insert(std::make_pair(Distance[Dst], Dst));
+ }
+ }
+ }
+ // If Target is not provided, find the closest exit block
+ if (Target == AnyExitBlock) {
+ for (uint64_t I = 0; I < NumBlocks(); I++) {
+ if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
+ if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
+ Target = I;
+ }
+ }
+ }
+ }
+ assert(Parent[Target] != nullptr && "a path does not exist");
+
+ // Extract the constructed path
+ std::vector<FlowJump *> Result;
+ uint64_t Now = Target;
+ while (Now != Source) {
+ assert(Now == Parent[Now]->Target && "incorrect parent jump");
+ Result.push_back(Parent[Now]);
+ Now = Parent[Now]->Source;
+ }
+ // Reverse the path, since it is extracted from Target to Source
+ std::reverse(Result.begin(), Result.end());
+ return Result;
+ }
+
+ /// A distance of a path for a given jump.
+ /// In order to incite the path to use blocks/jumps with large positive flow,
+ /// and avoid changing branch probability of outgoing edges drastically,
+ /// set the distance as follows:
+ /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
+ /// if Block.Weight > 0, then distance = 1
+ /// otherwise distance >> 1
+ int64_t jumpDistance(FlowJump *Jump) const {
+ int64_t BaseDistance = 100;
+ if (Jump->IsUnlikely)
+ return MinCostMaxFlow::AuxCostUnlikely;
+ if (Jump->Flow > 0)
+ return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
+ if (Func.Blocks[Jump->Target].Weight > 0)
+ return BaseDistance;
+ return BaseDistance * (NumBlocks() + 1);
+ };
+
+ uint64_t NumBlocks() const { return Func.Blocks.size(); }
+
+ /// Rebalance unknown subgraphs so as each branch splits with probabilities
+ /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability
+ void rebalanceUnknownSubgraphs() {
+ assert(UnknownFirstSuccProbability >= 0.0 &&
+ UnknownFirstSuccProbability <= 1.0 &&
+ "the share of the unknown successor should be between 0 and 1");
+ // Try to find unknown subgraphs from each non-unknown block
+ for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
+ auto SrcBlock = &Func.Blocks[I];
+ // Do not attempt to find unknown successors from a unknown or a
+ // zero-flow block
+ if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
+ continue;
+
+ std::vector<FlowBlock *> UnknownSuccs;
+ FlowBlock *DstBlock = nullptr;
+ // Find a unknown subgraphs starting at block SrcBlock
+ if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs))
+ continue;
+ // At the moment, we do not rebalance subgraphs containing cycles among
+ // unknown blocks
+ if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs))
+ continue;
+
+ // Rebalance the flow
+ rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs);
+ }
+ }
+
+ /// Find a unknown subgraph starting at block SrcBlock.
+ /// If the search is successful, the method sets DstBlock and UnknownSuccs.
+ bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock,
+ std::vector<FlowBlock *> &UnknownSuccs) {
+ // Run BFS from SrcBlock and make sure all paths are going through unknown
+ // blocks and end at a non-unknown DstBlock
+ auto Visited = std::vector<bool>(NumBlocks(), false);
+ std::queue<uint64_t> Queue;
+ DstBlock = nullptr;
+
+ Queue.push(SrcBlock->Index);
+ Visited[SrcBlock->Index] = true;
+ while (!Queue.empty()) {
+ auto &Block = Func.Blocks[Queue.front()];
+ Queue.pop();
+ // Process blocks reachable from Block
+ for (auto Jump : Block.SuccJumps) {
+ uint64_t Dst = Jump->Target;
+ if (Visited[Dst])
+ continue;
+ Visited[Dst] = true;
+ if (!Func.Blocks[Dst].UnknownWeight) {
+ // If we see non-unique non-unknown block reachable from SrcBlock,
+ // stop processing and skip rebalancing
+ FlowBlock *CandidateDstBlock = &Func.Blocks[Dst];
+ if (DstBlock != nullptr && DstBlock != CandidateDstBlock)
+ return false;
+ DstBlock = CandidateDstBlock;
+ } else {
+ Queue.push(Dst);
+ UnknownSuccs.push_back(&Func.Blocks[Dst]);
+ }
+ }
+ }
+
+ // If the list of unknown blocks is empty, we don't need rebalancing
+ if (UnknownSuccs.empty())
+ return false;
+ // If all reachable nodes from SrcBlock are unknown, skip rebalancing
+ if (DstBlock == nullptr)
+ return false;
+ // If any of the unknown blocks is an exit block, skip rebalancing
+ for (auto Block : UnknownSuccs) {
+ if (Block->isExit())
+ return false;
+ }
+
+ return true;
+ }
+
+ /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
+ /// UnknownSuccs in the topological order (so that all jumps are "forward").
+ bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
+ std::vector<FlowBlock *> &UnknownSuccs) {
+ // Extract local in-degrees in the considered subgraph
+ auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
+ for (auto Jump : SrcBlock->SuccJumps) {
+ LocalInDegree[Jump->Target]++;
+ }
+ for (uint64_t I = 0; I < UnknownSuccs.size(); I++) {
+ for (auto Jump : UnknownSuccs[I]->SuccJumps) {
+ LocalInDegree[Jump->Target]++;
+ }
+ }
+ // A loop containing SrcBlock
+ if (LocalInDegree[SrcBlock->Index] > 0)
+ return false;
+
+ std::vector<FlowBlock *> AcyclicOrder;
+ std::queue<uint64_t> Queue;
+ Queue.push(SrcBlock->Index);
+ while (!Queue.empty()) {
+ auto &Block = Func.Blocks[Queue.front()];
+ Queue.pop();
+ // Stop propagation once we reach DstBlock
+ if (Block.Index == DstBlock->Index)
+ break;
+
+ AcyclicOrder.push_back(&Block);
+ // Add to the queue all successors with zero local in-degree
+ for (auto Jump : Block.SuccJumps) {
+ uint64_t Dst = Jump->Target;
+ LocalInDegree[Dst]--;
+ if (LocalInDegree[Dst] == 0) {
+ Queue.push(Dst);
+ }
+ }
+ }
+
+ // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
+ // of all blocks
+ if (UnknownSuccs.size() + 1 != AcyclicOrder.size())
+ return false;
+ UnknownSuccs = AcyclicOrder;
+ return true;
+ }
+
+ /// Rebalance a given subgraph.
+ void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
+ std::vector<FlowBlock *> &UnknownSuccs) {
+ assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
+ assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns");
+
+ for (auto Block : UnknownSuccs) {
+ // Block's flow is the sum of incoming flows
+ uint64_t TotalFlow = 0;
+ if (Block == SrcBlock) {
+ TotalFlow = Block->Flow;
+ } else {
+ for (auto Jump : Block->PredJumps) {
+ TotalFlow += Jump->Flow;
+ }
+ Block->Flow = TotalFlow;
+ }
+
+ // Process all successor jumps and update corresponding flow values
+ for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) {
+ auto Jump = Block->SuccJumps[I];
+ if (I + 1 == Block->SuccJumps.size()) {
+ Jump->Flow = TotalFlow;
+ continue;
+ }
+ uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability);
+ Jump->Flow = Flow;
+ TotalFlow -= Flow;
+ }
+ }
+ }
+
+ /// A constant indicating an arbitrary exit block of a function.
+ static constexpr uint64_t AnyExitBlock = uint64_t(-1);
+
+ /// The function.
+ FlowFunction &Func;
+};
+
/// Initializing flow network for a given function.
///
/// Every block is split into three nodes that are responsible for (i) an
@@ -440,6 +786,39 @@ void verifyWeights(const FlowFunction &Func) {
}
}
assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
+
+ // Verify that there are no isolated flow components
+ // One could modify FlowFunction to hold edges indexed by the sources, which
+ // will avoid a creation of the object
+ auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
+ for (auto &Jump : Func.Jumps) {
+ if (Jump.Flow > 0) {
+ PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
+ }
+ }
+
+ // Run BFS from the source along edges with positive flow
+ std::queue<uint64_t> Queue;
+ auto Visited = std::vector<bool>(NumBlocks, false);
+ Queue.push(Func.Entry);
+ Visited[Func.Entry] = true;
+ while (!Queue.empty()) {
+ uint64_t Src = Queue.front();
+ Queue.pop();
+ for (uint64_t Dst : PositiveFlowEdges[Src]) {
+ if (!Visited[Dst]) {
+ Queue.push(Dst);
+ Visited[Dst] = true;
+ }
+ }
+ }
+
+ // Verify that every block that has a positive flow is reached from the source
+ // along edges with a positive flow
+ for (uint64_t I = 0; I < NumBlocks; I++) {
+ auto &Block = Func.Blocks[I];
+ assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
+ }
}
#endif
@@ -455,6 +834,10 @@ void llvm::applyFlowInference(FlowFunction &Func) {
// Extract flow values for every block and every edge
extractWeights(InferenceNetwork, Func);
+ // Post-processing adjustments to the flow
+ auto Adjuster = FlowAdjuster(Func);
+ Adjuster.run();
+
#ifndef NDEBUG
// Verify the result
verifyWeights(Func);
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 71c15d5c51fc..c840ee85795f 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1047,9 +1047,9 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
if (SE.DT.dominates(IncV, InsertPos))
break;
}
- for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) {
- fixupInsertPoints(*I);
- (*I)->moveBefore(InsertPos);
+ for (Instruction *I : llvm::reverse(IVIncs)) {
+ fixupInsertPoints(I);
+ I->moveBefore(InsertPos);
}
return true;
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index afa3ecde77f9..1046998c26de 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3629,7 +3629,7 @@ static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
return false; // TODO
// Use lambda to lazily compute expensive condition after cheap ones.
auto NoSideEffects = [](BasicBlock &BB) {
- return !llvm::any_of(BB, [](const Instruction &I) {
+ return llvm::none_of(BB, [](const Instruction &I) {
return I.mayWriteToMemory() || I.mayHaveSideEffects();
});
};
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index e190a1294eb3..02727a3dbf9c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -193,6 +193,19 @@ static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> A
}
}
+// Copy CallInst "flags" like musttail, notail, and tail. Return New param for
+// easier chaining. Calls to emit* and B.createCall should probably be wrapped
+// in this function when New is created to replace Old. Callers should take
+// care to check Old.isMustTailCall() if they aren't replacing Old directly
+// with New.
+static Value *copyFlags(const CallInst &Old, Value *New) {
+ assert(!Old.isMustTailCall() && "do not copy musttail call flags");
+ assert(!Old.isNoTailCall() && "do not copy notail call flags");
+ if (auto *NewCI = dyn_cast_or_null<CallInst>(New))
+ NewCI->setTailCallKind(Old.getTailCallKind());
+ return New;
+}
+
//===----------------------------------------------------------------------===//
// String and Memory Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -215,7 +228,7 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) {
if (Len == 0)
return Dst;
- return emitStrLenMemCpy(Src, Dst, Len, B);
+ return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, Len, B));
}
Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
@@ -279,7 +292,7 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
// strncat(x, s, c) -> strcat(x, s)
// s is constant so the strcat can be optimized further.
- return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+ return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B));
}
Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
@@ -300,9 +313,11 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
return nullptr;
- return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len),
- B, DL, TLI);
+ return copyFlags(
+ *CI,
+ emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), B,
+ DL, TLI));
}
// Otherwise, the character is a constant, see if the first argument is
@@ -340,7 +355,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
if (CharC->isZero())
- return emitStrChr(SrcStr, '\0', B, TLI);
+ return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI));
return nullptr;
}
@@ -385,25 +400,28 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
annotateDereferenceableBytes(CI, 1, Len2);
if (Len1 && Len2) {
- return emitMemCmp(Str1P, Str2P,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()),
- std::min(Len1, Len2)),
- B, DL, TLI);
+ return copyFlags(
+ *CI, emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ std::min(Len1, Len2)),
+ B, DL, TLI));
}
// strcmp to memcmp
if (!HasStr1 && HasStr2) {
if (canTransformToMemCmp(CI, Str1P, Len2, DL))
- return emitMemCmp(
- Str1P, Str2P,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
- TLI);
+ return copyFlags(
+ *CI,
+ emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
+ B, DL, TLI));
} else if (HasStr1 && !HasStr2) {
if (canTransformToMemCmp(CI, Str2P, Len1, DL))
- return emitMemCmp(
- Str1P, Str2P,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
- TLI);
+ return copyFlags(
+ *CI,
+ emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
+ B, DL, TLI));
}
annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
@@ -430,7 +448,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
return ConstantInt::get(CI->getType(), 0);
if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return emitMemCmp(Str1P, Str2P, Size, B, DL, TLI);
+ return copyFlags(*CI, emitMemCmp(Str1P, Str2P, Size, B, DL, TLI));
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -462,17 +480,19 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
if (!HasStr1 && HasStr2) {
Len2 = std::min(Len2, Length);
if (canTransformToMemCmp(CI, Str1P, Len2, DL))
- return emitMemCmp(
- Str1P, Str2P,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
- TLI);
+ return copyFlags(
+ *CI,
+ emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
+ B, DL, TLI));
} else if (HasStr1 && !HasStr2) {
Len1 = std::min(Len1, Length);
if (canTransformToMemCmp(CI, Str2P, Len1, DL))
- return emitMemCmp(
- Str1P, Str2P,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
- TLI);
+ return copyFlags(
+ *CI,
+ emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
+ B, DL, TLI));
}
return nullptr;
@@ -485,7 +505,7 @@ Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilderBase &B) {
if (SrcLen && Size) {
annotateDereferenceableBytes(CI, 0, SrcLen);
if (SrcLen <= Size->getZExtValue() + 1)
- return emitStrDup(Src, B, TLI);
+ return copyFlags(*CI, emitStrDup(Src, B, TLI));
}
return nullptr;
@@ -495,7 +515,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
-
+
annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
@@ -511,6 +531,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ copyFlags(*CI, NewCI);
return Dst;
}
@@ -520,7 +541,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
// stpcpy(d,s) -> strcpy(d,s) if the result is not used.
if (CI->use_empty())
- return emitStrCpy(Dst, Src, B, TLI);
+ return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
Value *StrLen = emitStrLen(Src, B, DL, TLI);
@@ -544,6 +565,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ copyFlags(*CI, NewCI);
return DstEnd;
}
@@ -583,6 +605,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
AttrBuilder ArgAttrs(CI->getAttributes().getParamAttrs(0));
NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
CI->getContext(), 0, ArgAttrs));
+ copyFlags(*CI, NewCI);
return Dst;
}
@@ -606,6 +629,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
ConstantInt::get(DL.getIntPtrType(PT), Len));
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ copyFlags(*CI, NewCI);
return Dst;
}
@@ -737,7 +761,7 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilderBase &B) {
// strpbrk(s, "a") -> strchr(s, 'a')
if (HasS2 && S2.size() == 1)
- return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
+ return copyFlags(*CI, emitStrChr(CI->getArgOperand(0), S2[0], B, TLI));
return nullptr;
}
@@ -793,7 +817,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) {
// strcspn(s, "") -> strlen(s)
if (HasS2 && S2.empty())
- return emitStrLen(CI->getArgOperand(0), B, DL, TLI);
+ return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, DL, TLI));
return nullptr;
}
@@ -1062,7 +1086,7 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) {
Value *LHS = CI->getArgOperand(0);
Value *RHS = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
- return emitBCmp(LHS, RHS, Size, B, DL, TLI);
+ return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI));
}
return nullptr;
@@ -1083,6 +1107,7 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ copyFlags(*CI, NewCI);
return CI->getArgOperand(0);
}
@@ -1110,7 +1135,8 @@ Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) {
size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF);
if (Pos == StringRef::npos) {
if (N->getZExtValue() <= SrcStr.size()) {
- B.CreateMemCpy(Dst, Align(1), Src, Align(1), CI->getArgOperand(3));
+ copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1),
+ CI->getArgOperand(3)));
return Constant::getNullValue(CI->getType());
}
return nullptr;
@@ -1119,7 +1145,7 @@ Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) {
Value *NewN =
ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue()));
// memccpy -> llvm.memcpy
- B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN);
+ copyFlags(*CI, B.CreateMemCpy(Dst, Align(1), Src, Align(1), NewN));
return Pos + 1 <= N->getZExtValue()
? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN)
: Constant::getNullValue(CI->getType());
@@ -1136,6 +1162,7 @@ Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
// TODO: Attach return value attributes to the 1st operand to preserve them?
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ copyFlags(*CI, NewCI);
return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
}
@@ -1150,6 +1177,7 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ copyFlags(*CI, NewCI);
return CI->getArgOperand(0);
}
@@ -1164,12 +1192,13 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ copyFlags(*CI, NewCI);
return CI->getArgOperand(0);
}
Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
- return emitMalloc(CI->getArgOperand(1), B, DL, TLI);
+ return copyFlags(*CI, emitMalloc(CI->getArgOperand(1), B, DL, TLI));
return nullptr;
}
@@ -1190,7 +1219,7 @@ static Value *replaceUnaryCall(CallInst *CI, IRBuilderBase &B,
Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
CallInst *NewCall = B.CreateCall(F, V);
NewCall->takeName(CI);
- return NewCall;
+ return copyFlags(*CI, NewCall);
}
/// Return a variant of Val with float type.
@@ -1311,7 +1340,8 @@ Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilderBase &B) {
Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt,
CI->getType());
- return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs");
+ return copyFlags(
+ *CI, B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs"));
}
static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
@@ -1334,14 +1364,16 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
// sin(-X) --> -sin(X)
// tan(-X) --> -tan(X)
if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X)))))
- return B.CreateFNeg(B.CreateCall(Call->getCalledFunction(), X));
+ return B.CreateFNeg(
+ copyFlags(*Call, B.CreateCall(Call->getCalledFunction(), X)));
break;
case LibFunc_cos:
case LibFunc_cosf:
case LibFunc_cosl:
// cos(-X) --> cos(X)
if (match(Call->getArgOperand(0), m_FNeg(m_Value(X))))
- return B.CreateCall(Call->getCalledFunction(), X, "cos");
+ return copyFlags(*Call,
+ B.CreateCall(Call->getCalledFunction(), X, "cos"));
break;
default:
break;
@@ -1476,9 +1508,10 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
(isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
- return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI,
- LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
- B, Attrs);
+ return copyFlags(*Pow,
+ emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI,
+ TLI, LibFunc_ldexp, LibFunc_ldexpf,
+ LibFunc_ldexpl, B, Attrs));
}
// pow(2.0 ** n, x) -> exp2(n * x)
@@ -1496,11 +1529,13 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
if (Pow->doesNotAccessMemory())
- return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
- FMul, "exp2");
+ return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration(
+ Mod, Intrinsic::exp2, Ty),
+ FMul, "exp2"));
else
- return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
- LibFunc_exp2l, B, Attrs);
+ return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
+ LibFunc_exp2f,
+ LibFunc_exp2l, B, Attrs));
}
}
@@ -1508,8 +1543,9 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
// TODO: There is no exp10() intrinsic yet, but some day there shall be one.
if (match(Base, m_SpecificFP(10.0)) &&
hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
- return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
- LibFunc_exp10l, B, Attrs);
+ return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10,
+ LibFunc_exp10f, LibFunc_exp10l,
+ B, Attrs));
// pow(x, y) -> exp2(log2(x) * y)
if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() &&
@@ -1528,11 +1564,13 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
if (Log) {
Value *FMul = B.CreateFMul(Log, Expo, "mul");
if (Pow->doesNotAccessMemory())
- return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
- FMul, "exp2");
+ return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration(
+ Mod, Intrinsic::exp2, Ty),
+ FMul, "exp2"));
else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l))
- return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
- LibFunc_exp2l, B, Attrs);
+ return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
+ LibFunc_exp2f,
+ LibFunc_exp2l, B, Attrs));
}
}
@@ -1595,6 +1633,8 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs");
}
+ Sqrt = copyFlags(*Pow, Sqrt);
+
// Handle non finite base by expanding to
// (x == -infinity ? +infinity : sqrt(x)).
if (!Pow->hasNoInfs()) {
@@ -1721,15 +1761,18 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
if (ExpoF->isInteger() &&
ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
APFloat::opOK) {
- return createPowWithIntegerExponent(
- Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), M, B);
+ return copyFlags(
+ *Pow,
+ createPowWithIntegerExponent(
+ Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo),
+ M, B));
}
}
// powf(x, itofp(y)) -> powi(x, y)
if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
- return createPowWithIntegerExponent(Base, ExpoI, M, B);
+ return copyFlags(*Pow, createPowWithIntegerExponent(Base, ExpoI, M, B));
}
// Shrink pow() to powf() if the arguments are single precision,
@@ -1792,7 +1835,8 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
: Intrinsic::maxnum;
Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
- return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
+ return copyFlags(
+ *CI, B.CreateCall(F, {CI->getArgOperand(0), CI->getArgOperand(1)}));
}
Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
@@ -2010,9 +2054,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
// of the square root calculation.
Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
- return B.CreateFMul(FabsCall, SqrtCall);
+ return copyFlags(*CI, B.CreateFMul(FabsCall, SqrtCall));
}
- return FabsCall;
+ return copyFlags(*CI, FabsCall);
}
// TODO: Generalize to handle any trig function and its inverse.
@@ -2327,7 +2371,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
// printf("x") -> putchar('x'), even for "%" and "%%".
if (FormatStr.size() == 1 || FormatStr == "%%")
- return emitPutChar(B.getInt32(FormatStr[0]), B, TLI);
+ return copyFlags(*CI, emitPutChar(B.getInt32(FormatStr[0]), B, TLI));
// Try to remove call or emit putchar/puts.
if (FormatStr == "%s" && CI->arg_size() > 1) {
@@ -2339,12 +2383,12 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
return (Value *)CI;
// printf("%s", "a") --> putchar('a')
if (OperandStr.size() == 1)
- return emitPutChar(B.getInt32(OperandStr[0]), B, TLI);
+ return copyFlags(*CI, emitPutChar(B.getInt32(OperandStr[0]), B, TLI));
// printf("%s", str"\n") --> puts(str)
if (OperandStr.back() == '\n') {
OperandStr = OperandStr.drop_back();
Value *GV = B.CreateGlobalString(OperandStr, "str");
- return emitPutS(GV, B, TLI);
+ return copyFlags(*CI, emitPutS(GV, B, TLI));
}
return nullptr;
}
@@ -2356,19 +2400,19 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
Value *GV = B.CreateGlobalString(FormatStr, "str");
- return emitPutS(GV, B, TLI);
+ return copyFlags(*CI, emitPutS(GV, B, TLI));
}
// Optimize specific format strings.
// printf("%c", chr) --> putchar(chr)
if (FormatStr == "%c" && CI->arg_size() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy())
- return emitPutChar(CI->getArgOperand(1), B, TLI);
+ return copyFlags(*CI, emitPutChar(CI->getArgOperand(1), B, TLI));
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->arg_size() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy())
- return emitPutS(CI->getArgOperand(1), B, TLI);
+ return copyFlags(*CI, emitPutS(CI->getArgOperand(1), B, TLI));
return nullptr;
}
@@ -2459,7 +2503,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
if (CI->use_empty())
// sprintf(dest, "%s", str) -> strcpy(dest, str)
- return emitStrCpy(Dest, CI->getArgOperand(2), B, TLI);
+ return copyFlags(*CI, emitStrCpy(Dest, CI->getArgOperand(2), B, TLI));
uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
if (SrcLen) {
@@ -2558,10 +2602,12 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
// snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
// strlen(fmt)+1)
- B.CreateMemCpy(
- CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1),
- ConstantInt::get(DL.getIntPtrType(CI->getContext()),
- FormatStr.size() + 1)); // Copy the null byte.
+ copyFlags(
+ *CI,
+ B.CreateMemCpy(
+ CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ FormatStr.size() + 1))); // Copy the null byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -2599,8 +2645,10 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
else if (N < Str.size() + 1)
return nullptr;
- B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(3),
- Align(1), ConstantInt::get(CI->getType(), Str.size() + 1));
+ copyFlags(
+ *CI, B.CreateMemCpy(CI->getArgOperand(0), Align(1),
+ CI->getArgOperand(3), Align(1),
+ ConstantInt::get(CI->getType(), Str.size() + 1)));
// The snprintf result is the unincremented number of bytes in the string.
return ConstantInt::get(CI->getType(), Str.size());
@@ -2640,10 +2688,11 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
if (FormatStr.contains('%'))
return nullptr; // We found a format specifier.
- return emitFWrite(
- CI->getArgOperand(1),
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()),
- CI->getArgOperand(0), B, DL, TLI);
+ return copyFlags(
+ *CI, emitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, DL, TLI));
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -2656,14 +2705,16 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy())
return nullptr;
- return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
+ return copyFlags(
+ *CI, emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI));
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
- return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
+ return copyFlags(
+ *CI, emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI));
}
return nullptr;
}
@@ -2750,10 +2801,11 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
return nullptr;
// Known to have no uses (see above).
- return emitFWrite(
- CI->getArgOperand(0),
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
- CI->getArgOperand(1), B, DL, TLI);
+ return copyFlags(
+ *CI,
+ emitFWrite(CI->getArgOperand(0),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
+ CI->getArgOperand(1), B, DL, TLI));
}
Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
@@ -2765,15 +2817,16 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
// puts("") -> putchar('\n')
StringRef Str;
if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty())
- return emitPutChar(B.getInt32('\n'), B, TLI);
+ return copyFlags(*CI, emitPutChar(B.getInt32('\n'), B, TLI));
return nullptr;
}
Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
// bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
- return B.CreateMemMove(CI->getArgOperand(1), Align(1), CI->getArgOperand(0),
- Align(1), CI->getArgOperand(2));
+ return copyFlags(*CI, B.CreateMemMove(CI->getArgOperand(1), Align(1),
+ CI->getArgOperand(0), Align(1),
+ CI->getArgOperand(2)));
}
bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
@@ -2971,6 +3024,8 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
}
Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
+ assert(!CI->isMustTailCall() && "These transforms aren't musttail safe.");
+
// TODO: Split out the code below that operates on FP calls so that
// we can all non-FP calls with the StrictFP attribute to be
// optimized.
@@ -3212,6 +3267,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ copyFlags(*CI, NewCI);
return CI->getArgOperand(0);
}
return nullptr;
@@ -3225,6 +3281,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ copyFlags(*CI, NewCI);
return CI->getArgOperand(0);
}
return nullptr;
@@ -3238,6 +3295,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
CI->getArgOperand(2), Align(1));
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
+ copyFlags(*CI, NewCI);
return CI->getArgOperand(0);
}
return nullptr;
@@ -3252,7 +3310,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI,
CallInst *NewCI = cast<CallInst>(Call);
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
- return NewCI;
+ return copyFlags(*CI, NewCI);
}
return nullptr;
}
@@ -3277,9 +3335,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// string lengths for varying.
if (isFortifiedCallFoldable(CI, 2, None, 1)) {
if (Func == LibFunc_strcpy_chk)
- return emitStrCpy(Dst, Src, B, TLI);
+ return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI));
else
- return emitStpCpy(Dst, Src, B, TLI);
+ return copyFlags(*CI, emitStpCpy(Dst, Src, B, TLI));
}
if (OnlyLowerUnknownSize)
@@ -3303,14 +3361,14 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// a __memcpy_chk, we still need to return the correct end pointer.
if (Ret && Func == LibFunc_stpcpy_chk)
return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
- return Ret;
+ return copyFlags(*CI, cast<CallInst>(Ret));
}
Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 1, None, 0))
- return emitStrLen(CI->getArgOperand(0), B, CI->getModule()->getDataLayout(),
- TLI);
+ return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B,
+ CI->getModule()->getDataLayout(), TLI));
return nullptr;
}
@@ -3319,11 +3377,13 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
LibFunc Func) {
if (isFortifiedCallFoldable(CI, 3, 2)) {
if (Func == LibFunc_strncpy_chk)
- return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
+ return copyFlags(*CI,
+ emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI));
else
- return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
+ return copyFlags(*CI,
+ emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI));
}
return nullptr;
@@ -3332,8 +3392,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 4, 3))
- return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), CI->getArgOperand(3), B, TLI);
+ return copyFlags(
+ *CI, emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3), B, TLI));
return nullptr;
}
@@ -3342,8 +3403,9 @@ Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 5));
- return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(4), VariadicArgs, B, TLI);
+ return copyFlags(*CI,
+ emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(4), VariadicArgs, B, TLI));
}
return nullptr;
@@ -3353,8 +3415,9 @@ Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
SmallVector<Value *, 8> VariadicArgs(drop_begin(CI->args(), 4));
- return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
- B, TLI);
+ return copyFlags(*CI,
+ emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+ VariadicArgs, B, TLI));
}
return nullptr;
@@ -3363,7 +3426,8 @@ Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 2))
- return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI);
+ return copyFlags(
+ *CI, emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI));
return nullptr;
}
@@ -3371,8 +3435,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3))
- return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
+ return copyFlags(*CI,
+ emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI));
return nullptr;
}
@@ -3380,8 +3445,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3))
- return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
+ return copyFlags(*CI,
+ emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI));
return nullptr;
}
@@ -3389,8 +3455,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3))
- return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
+ return copyFlags(*CI,
+ emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI));
return nullptr;
}
@@ -3398,8 +3465,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 3, 1, None, 2))
- return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(4), CI->getArgOperand(5), B, TLI);
+ return copyFlags(
+ *CI, emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(4), CI->getArgOperand(5), B, TLI));
return nullptr;
}
@@ -3407,8 +3475,9 @@ Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
IRBuilderBase &B) {
if (isFortifiedCallFoldable(CI, 2, None, None, 1))
- return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
- CI->getArgOperand(4), B, TLI);
+ return copyFlags(*CI,
+ emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+ CI->getArgOperand(4), B, TLI));
return nullptr;
}
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index c3eafd6b2492..b822db938af8 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -450,6 +450,12 @@ Value *Mapper::mapValue(const Value *V) {
DSOLocalEquivalent::get(Func), NewTy);
}
+ if (const auto *NC = dyn_cast<NoCFIValue>(C)) {
+ auto *Val = mapValue(NC->getGlobalValue());
+ GlobalValue *GV = cast<GlobalValue>(Val);
+ return getVM()[NC] = NoCFIValue::get(GV);
+ }
+
auto mapValueOrNull = [this](Value *V) {
auto Mapped = mapValue(V);
assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) &&
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 805011191da0..81e5aa223c07 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -55,22 +55,23 @@ static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
cl::desc("The maximum number of SCEV checks allowed with a "
"vectorize(enable) pragma"));
-// FIXME: When scalable vectorization is stable enough, change the default
-// to SK_PreferFixedWidth.
-static cl::opt<LoopVectorizeHints::ScalableForceKind> ScalableVectorization(
- "scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly),
- cl::Hidden,
- cl::desc("Control whether the compiler can use scalable vectors to "
- "vectorize a loop"),
- cl::values(
- clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
- "Scalable vectorization is disabled."),
- clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on",
- "Scalable vectorization is available, but favor fixed-width "
- "vectorization when the cost is inconclusive."),
- clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred",
- "Scalable vectorization is available and favored when the "
- "cost is inconclusive.")));
+static cl::opt<LoopVectorizeHints::ScalableForceKind>
+ ForceScalableVectorization(
+ "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
+ cl::Hidden,
+ cl::desc("Control whether the compiler can use scalable vectors to "
+ "vectorize a loop"),
+ cl::values(
+ clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
+ "Scalable vectorization is disabled."),
+ clEnumValN(
+ LoopVectorizeHints::SK_PreferScalable, "preferred",
+ "Scalable vectorization is available and favored when the "
+ "cost is inconclusive."),
+ clEnumValN(
+ LoopVectorizeHints::SK_PreferScalable, "on",
+ "Scalable vectorization is available and favored when the "
+ "cost is inconclusive.")));
/// Maximum vectorization interleave count.
static const unsigned MaxInterleaveFactor = 16;
@@ -95,7 +96,8 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
bool InterleaveOnlyWhenForced,
- OptimizationRemarkEmitter &ORE)
+ OptimizationRemarkEmitter &ORE,
+ const TargetTransformInfo *TTI)
: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
@@ -110,14 +112,32 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
if (VectorizerParams::isInterleaveForced())
Interleave.Value = VectorizerParams::VectorizationInterleave;
+ // If the metadata doesn't explicitly specify whether to enable scalable
+ // vectorization, then decide based on the following criteria (increasing
+ // level of priority):
+ // - Target default
+ // - Metadata width
+ // - Force option (always overrides)
+ if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
+ if (TTI)
+ Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
+ : SK_FixedWidthOnly;
+
+ if (Width.Value)
+ // If the width is set, but the metadata says nothing about the scalable
+ // property, then assume it concerns only a fixed-width UserVF.
+ // If width is not set, the flag takes precedence.
+ Scalable.Value = SK_FixedWidthOnly;
+ }
+
+ // If the flag is set to force any use of scalable vectors, override the loop
+ // hints.
+ if (ForceScalableVectorization.getValue() !=
+ LoopVectorizeHints::SK_Unspecified)
+ Scalable.Value = ForceScalableVectorization.getValue();
+
+ // Scalable vectorization is disabled if no preference is specified.
if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
- // If the width is set, but the metadata says nothing about the scalable
- // property, then assume it concerns only a fixed-width UserVF.
- // If width is not set, the flag takes precedence.
- Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization;
- else if (ScalableVectorization == SK_FixedWidthOnly)
- // If the flag is set to disable any use of scalable vectors, override the
- // loop hint.
Scalable.Value = SK_FixedWidthOnly;
if (IsVectorized.Value != 1)
@@ -929,7 +949,7 @@ bool LoopVectorizationLegality::canVectorizeFPMath(
}));
}
-bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
+bool LoopVectorizationLegality::isInductionPhi(const Value *V) const {
Value *In0 = const_cast<Value *>(V);
PHINode *PN = dyn_cast_or_null<PHINode>(In0);
if (!PN)
@@ -938,16 +958,29 @@ bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
return Inductions.count(PN);
}
-bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
+const InductionDescriptor *
+LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const {
+ if (!isInductionPhi(Phi))
+ return nullptr;
+ auto &ID = getInductionVars().find(Phi)->second;
+ if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
+ ID.getKind() == InductionDescriptor::IK_FpInduction)
+ return &ID;
+ return nullptr;
+}
+
+bool LoopVectorizationLegality::isCastedInductionVariable(
+ const Value *V) const {
auto *Inst = dyn_cast<Instruction>(V);
return (Inst && InductionCastsToIgnore.count(Inst));
}
-bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
+bool LoopVectorizationLegality::isInductionVariable(const Value *V) const {
return isInductionPhi(V) || isCastedInductionVariable(V);
}
-bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
+bool LoopVectorizationLegality::isFirstOrderRecurrence(
+ const PHINode *Phi) const {
return FirstOrderRecurrences.count(Phi);
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index a7d6609f8c56..71eb39a18d2f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -45,16 +45,17 @@ class VPBuilder {
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
VPInstruction *createInstruction(unsigned Opcode,
- ArrayRef<VPValue *> Operands) {
- VPInstruction *Instr = new VPInstruction(Opcode, Operands);
+ ArrayRef<VPValue *> Operands, DebugLoc DL) {
+ VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL);
if (BB)
BB->insert(Instr, InsertPt);
return Instr;
}
VPInstruction *createInstruction(unsigned Opcode,
- std::initializer_list<VPValue *> Operands) {
- return createInstruction(Opcode, ArrayRef<VPValue *>(Operands));
+ std::initializer_list<VPValue *> Operands,
+ DebugLoc DL) {
+ return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL);
}
public:
@@ -123,30 +124,33 @@ public:
/// its underlying Instruction.
VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
Instruction *Inst = nullptr) {
- VPInstruction *NewVPInst = createInstruction(Opcode, Operands);
+ DebugLoc DL;
+ if (Inst)
+ DL = Inst->getDebugLoc();
+ VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL);
NewVPInst->setUnderlyingValue(Inst);
return NewVPInst;
}
- VPValue *createNaryOp(unsigned Opcode,
- std::initializer_list<VPValue *> Operands,
- Instruction *Inst = nullptr) {
- return createNaryOp(Opcode, ArrayRef<VPValue *>(Operands), Inst);
+ VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
+ DebugLoc DL) {
+ return createInstruction(Opcode, Operands, DL);
}
- VPValue *createNot(VPValue *Operand) {
- return createInstruction(VPInstruction::Not, {Operand});
+ VPValue *createNot(VPValue *Operand, DebugLoc DL) {
+ return createInstruction(VPInstruction::Not, {Operand}, DL);
}
- VPValue *createAnd(VPValue *LHS, VPValue *RHS) {
- return createInstruction(Instruction::BinaryOps::And, {LHS, RHS});
+ VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL) {
+ return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL);
}
- VPValue *createOr(VPValue *LHS, VPValue *RHS) {
- return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
+ VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL) {
+ return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL);
}
- VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) {
- return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal});
+ VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
+ DebugLoc DL) {
+ return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL);
}
//===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5ca0adb4242c..4747f34fcc62 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -428,6 +428,8 @@ class GeneratedRTChecks;
namespace llvm {
+AnalysisKey ShouldRunExtraVectorPasses::Key;
+
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
@@ -506,8 +508,8 @@ public:
/// Widen an integer or floating-point induction variable \p IV. If \p Trunc
/// is provided, the integer induction variable will first be truncated to
/// the corresponding type.
- void widenIntOrFpInduction(PHINode *IV, Value *Start, TruncInst *Trunc,
- VPValue *Def, VPValue *CastDef,
+ void widenIntOrFpInduction(PHINode *IV, const InductionDescriptor &ID,
+ Value *Start, TruncInst *Trunc, VPValue *Def,
VPTransformState &State);
/// Construct the vector value of a scalarized value \p V one lane at a time.
@@ -534,7 +536,7 @@ public:
/// Returns true if the reordering of FP operations is not allowed, but we are
/// able to vectorize with strict in-order reductions for the given RdxDesc.
- bool useOrderedReductions(RecurrenceDescriptor &RdxDesc);
+ bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc);
/// Create a broadcast instruction. This method generates a broadcast
/// instruction (shuffle) for loop invariant values and for the induction
@@ -619,7 +621,7 @@ protected:
/// can also be a truncate instruction.
void buildScalarSteps(Value *ScalarIV, Value *Step, Instruction *EntryVal,
const InductionDescriptor &ID, VPValue *Def,
- VPValue *CastDef, VPTransformState &State);
+ VPTransformState &State);
/// Create a vector induction phi node based on an existing scalar one. \p
/// EntryVal is the value from the original loop that maps to the vector phi
@@ -629,7 +631,6 @@ protected:
void createVectorIntOrFpInductionPHI(const InductionDescriptor &II,
Value *Step, Value *Start,
Instruction *EntryVal, VPValue *Def,
- VPValue *CastDef,
VPTransformState &State);
/// Returns true if an instruction \p I should be scalarized instead of
@@ -639,29 +640,6 @@ protected:
/// Returns true if we should generate a scalar version of \p IV.
bool needsScalarInduction(Instruction *IV) const;
- /// If there is a cast involved in the induction variable \p ID, which should
- /// be ignored in the vectorized loop body, this function records the
- /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the
- /// cast. We had already proved that the casted Phi is equal to the uncasted
- /// Phi in the vectorized loop (under a runtime guard), and therefore
- /// there is no need to vectorize the cast - the same value can be used in the
- /// vector loop for both the Phi and the cast.
- /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
- /// Otherwise, \p VectorLoopValue is a widened/vectorized value.
- ///
- /// \p EntryVal is the value from the original loop that maps to the vector
- /// phi node and is used to distinguish what is the IV currently being
- /// processed - original one (if \p EntryVal is a phi corresponding to the
- /// original IV) or the "newly-created" one based on the proof mentioned above
- /// (see also buildScalarSteps() and createVectorIntOrFPInductionPHI()). In the
- /// latter case \p EntryVal is a TruncInst and we must not record anything for
- /// that IV, but it's error-prone to expect callers of this routine to care
- /// about that, hence this explicit parameter.
- void recordVectorLoopValueForInductionCast(
- const InductionDescriptor &ID, const Instruction *EntryVal,
- Value *VectorLoopValue, VPValue *CastDef, VPTransformState &State,
- unsigned Part, unsigned Lane = UINT_MAX);
-
/// Generate a shuffle sequence that will reverse the vector Vec.
virtual Value *reverseVector(Value *Vec);
@@ -698,7 +676,8 @@ protected:
/// flags, which can be found from the original scalar operations.
Value *emitTransformedIndex(IRBuilder<> &B, Value *Index, ScalarEvolution *SE,
const DataLayout &DL,
- const InductionDescriptor &ID) const;
+ const InductionDescriptor &ID,
+ BasicBlock *VectorHeader) const;
/// Emit basic blocks (prefixed with \p Prefix) for the iteration check,
/// vector loop preheader, middle block and scalar preheader. Also
@@ -1728,7 +1707,8 @@ private:
/// disabled or unsupported, then the scalable part will be equal to
/// ElementCount::getScalable(0).
FixedScalableVFPair computeFeasibleMaxVF(unsigned ConstTripCount,
- ElementCount UserVF);
+ ElementCount UserVF,
+ bool FoldTailByMasking);
/// \return the maximized element count based on the targets vector
/// registers and the loop trip-count, but limited to a maximum safe VF.
@@ -1741,7 +1721,8 @@ private:
ElementCount getMaximizedVFForTarget(unsigned ConstTripCount,
unsigned SmallestType,
unsigned WidestType,
- const ElementCount &MaxSafeVF);
+ const ElementCount &MaxSafeVF,
+ bool FoldTailByMasking);
/// \return the maximum legal scalable VF, based on the safe max number
/// of elements.
@@ -2356,8 +2337,8 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
const InductionDescriptor &II, Value *Step, Value *Start,
- Instruction *EntryVal, VPValue *Def, VPValue *CastDef,
- VPTransformState &State) {
+ Instruction *EntryVal, VPValue *Def, VPTransformState &State) {
+ IRBuilder<> &Builder = State.Builder;
assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
"Expected either an induction phi-node or a truncate of it!");
@@ -2373,7 +2354,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
}
Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
- Value *SplatStart = Builder.CreateVectorSplat(VF, Start);
+ Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
Value *SteppedStart =
getStepVector(SplatStart, Zero, Step, II.getInductionOpcode());
@@ -2394,9 +2375,9 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
Type *StepType = Step->getType();
Value *RuntimeVF;
if (Step->getType()->isFloatingPointTy())
- RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, VF);
+ RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
else
- RuntimeVF = getRuntimeVF(Builder, StepType, VF);
+ RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
// Create a vector splat to use in the induction update.
@@ -2405,8 +2386,8 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
// IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
// handle a constant vector splat.
Value *SplatVF = isa<Constant>(Mul)
- ? ConstantVector::getSplat(VF, cast<Constant>(Mul))
- : Builder.CreateVectorSplat(VF, Mul);
+ ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
+ : Builder.CreateVectorSplat(State.VF, Mul);
Builder.restoreIP(CurrIP);
// We may need to add the step a number of times, depending on the unroll
@@ -2420,8 +2401,6 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
if (isa<TruncInst>(EntryVal))
addMetadata(LastInduction, EntryVal);
- recordVectorLoopValueForInductionCast(II, EntryVal, LastInduction, CastDef,
- State, Part);
LastInduction = cast<Instruction>(
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
@@ -2455,56 +2434,21 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
return llvm::any_of(IV->users(), isScalarInst);
}
-void InnerLoopVectorizer::recordVectorLoopValueForInductionCast(
- const InductionDescriptor &ID, const Instruction *EntryVal,
- Value *VectorLoopVal, VPValue *CastDef, VPTransformState &State,
- unsigned Part, unsigned Lane) {
- assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
- "Expected either an induction phi-node or a truncate of it!");
-
- // This induction variable is not the phi from the original loop but the
- // newly-created IV based on the proof that casted Phi is equal to the
- // uncasted Phi in the vectorized loop (under a runtime guard possibly). It
- // re-uses the same InductionDescriptor that original IV uses but we don't
- // have to do any recording in this case - that is done when original IV is
- // processed.
- if (isa<TruncInst>(EntryVal))
- return;
-
- if (!CastDef) {
- assert(ID.getCastInsts().empty() &&
- "there are casts for ID, but no CastDef");
- return;
- }
- assert(!ID.getCastInsts().empty() &&
- "there is a CastDef, but no casts for ID");
- // Only the first Cast instruction in the Casts vector is of interest.
- // The rest of the Casts (if exist) have no uses outside the
- // induction update chain itself.
- if (Lane < UINT_MAX)
- State.set(CastDef, VectorLoopVal, VPIteration(Part, Lane));
- else
- State.set(CastDef, VectorLoopVal, Part);
-}
-
-void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
- TruncInst *Trunc, VPValue *Def,
- VPValue *CastDef,
+void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV,
+ const InductionDescriptor &ID,
+ Value *Start, TruncInst *Trunc,
+ VPValue *Def,
VPTransformState &State) {
+ IRBuilder<> &Builder = State.Builder;
assert((IV->getType()->isIntegerTy() || IV != OldInduction) &&
"Primary induction variable must have an integer type");
-
- auto II = Legal->getInductionVars().find(IV);
- assert(II != Legal->getInductionVars().end() && "IV is not an induction");
-
- auto ID = II->second;
assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
// The value from the original loop to which we are mapping the new induction
// variable.
Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
- auto &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
+ auto &DL = EntryVal->getModule()->getDataLayout();
// Generate code for the induction step. Note that induction steps are
// required to be loop-invariant
@@ -2514,7 +2458,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
if (PSE.getSE()->isSCEVable(IV->getType())) {
SCEVExpander Exp(*PSE.getSE(), DL, "induction");
return Exp.expandCodeFor(Step, Step->getType(),
- LoopVectorPreHeader->getTerminator());
+ State.CFG.VectorPreHeader->getTerminator());
}
return cast<SCEVUnknown>(Step)->getValue();
};
@@ -2530,7 +2474,8 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
? Builder.CreateSExtOrTrunc(Induction, IV->getType())
: Builder.CreateCast(Instruction::SIToFP, Induction,
IV->getType());
- ScalarIV = emitTransformedIndex(Builder, ScalarIV, PSE.getSE(), DL, ID);
+ ScalarIV = emitTransformedIndex(Builder, ScalarIV, PSE.getSE(), DL, ID,
+ State.CFG.PrevBB);
ScalarIV->setName("offset.idx");
}
if (Trunc) {
@@ -2548,20 +2493,19 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) {
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
for (unsigned Part = 0; Part < UF; ++Part) {
- assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ assert(!State.VF.isScalable() && "scalable vectors not yet supported.");
Value *StartIdx;
if (Step->getType()->isFloatingPointTy())
- StartIdx = getRuntimeVFAsFloat(Builder, Step->getType(), VF * Part);
+ StartIdx =
+ getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part);
else
- StartIdx = getRuntimeVF(Builder, Step->getType(), VF * Part);
+ StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part);
Value *EntryPart =
getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode());
State.set(Def, EntryPart, Part);
if (Trunc)
addMetadata(EntryPart, Trunc);
- recordVectorLoopValueForInductionCast(ID, EntryVal, EntryPart, CastDef,
- State, Part);
}
};
@@ -2572,7 +2516,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
// Now do the actual transformations, and start with creating the step value.
Value *Step = CreateStepValue(ID.getStep());
- if (VF.isZero() || VF.isScalar()) {
+ if (State.VF.isZero() || State.VF.isScalar()) {
Value *ScalarIV = CreateScalarIV(Step);
CreateSplatIV(ScalarIV, Step);
return;
@@ -2583,8 +2527,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
// least one user in the loop that is not widened.
auto NeedsScalarIV = needsScalarInduction(EntryVal);
if (!NeedsScalarIV) {
- createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, CastDef,
- State);
+ createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
return;
}
@@ -2592,14 +2535,13 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
// create the phi node, we will splat the scalar induction variable in each
// loop iteration.
if (!shouldScalarizeInstruction(EntryVal)) {
- createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, CastDef,
- State);
+ createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
Value *ScalarIV = CreateScalarIV(Step);
// Create scalar steps that can be used by instructions we will later
// scalarize. Note that the addition of the scalar steps will not increase
// the number of instructions in the loop in the common case prior to
// InstCombine. We will be trading one vector extract for each scalar step.
- buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, CastDef, State);
+ buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
return;
}
@@ -2609,7 +2551,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
Value *ScalarIV = CreateScalarIV(Step);
if (!Cost->isScalarEpilogueAllowed())
CreateSplatIV(ScalarIV, Step);
- buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, CastDef, State);
+ buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
}
Value *InnerLoopVectorizer::getStepVector(Value *Val, Value *StartIdx,
@@ -2663,10 +2605,11 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, Value *StartIdx,
void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
Instruction *EntryVal,
const InductionDescriptor &ID,
- VPValue *Def, VPValue *CastDef,
+ VPValue *Def,
VPTransformState &State) {
+ IRBuilder<> &Builder = State.Builder;
// We shouldn't have to build scalar steps if we aren't vectorizing.
- assert(VF.isVector() && "VF should be greater than one");
+ assert(State.VF.isVector() && "VF should be greater than one");
// Get the value type and ensure it and the step have the same integer type.
Type *ScalarIVTy = ScalarIV->getType()->getScalarType();
assert(ScalarIVTy == Step->getType() &&
@@ -2688,33 +2631,32 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
// iteration. If EntryVal is uniform, we only need to generate the first
// lane. Otherwise, we generate all VF values.
bool IsUniform =
- Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), VF);
- unsigned Lanes = IsUniform ? 1 : VF.getKnownMinValue();
+ Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), State.VF);
+ unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue();
// Compute the scalar steps and save the results in State.
Type *IntStepTy = IntegerType::get(ScalarIVTy->getContext(),
ScalarIVTy->getScalarSizeInBits());
Type *VecIVTy = nullptr;
Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
- if (!IsUniform && VF.isScalable()) {
- VecIVTy = VectorType::get(ScalarIVTy, VF);
- UnitStepVec = Builder.CreateStepVector(VectorType::get(IntStepTy, VF));
- SplatStep = Builder.CreateVectorSplat(VF, Step);
- SplatIV = Builder.CreateVectorSplat(VF, ScalarIV);
+ if (!IsUniform && State.VF.isScalable()) {
+ VecIVTy = VectorType::get(ScalarIVTy, State.VF);
+ UnitStepVec =
+ Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
+ SplatStep = Builder.CreateVectorSplat(State.VF, Step);
+ SplatIV = Builder.CreateVectorSplat(State.VF, ScalarIV);
}
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *StartIdx0 = createStepForVF(Builder, IntStepTy, VF, Part);
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
- if (!IsUniform && VF.isScalable()) {
- auto *SplatStartIdx = Builder.CreateVectorSplat(VF, StartIdx0);
+ if (!IsUniform && State.VF.isScalable()) {
+ auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
if (ScalarIVTy->isFloatingPointTy())
InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
State.set(Def, Add, Part);
- recordVectorLoopValueForInductionCast(ID, EntryVal, Add, CastDef, State,
- Part);
// It's useful to record the lane values too for the known minimum number
// of elements so we do those below. This improves the code quality when
// trying to extract the first element, for example.
@@ -2728,14 +2670,12 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
AddOp, StartIdx0, getSignedIntOrFpConstant(ScalarIVTy, Lane));
// The step returned by `createStepForVF` is a runtime-evaluated value
// when VF is scalable. Otherwise, it should be folded into a Constant.
- assert((VF.isScalable() || isa<Constant>(StartIdx)) &&
+ assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
"Expected StartIdx to be folded to a constant when VF is not "
"scalable");
auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
auto *Add = Builder.CreateBinOp(AddOp, ScalarIV, Mul);
State.set(Def, Add, VPIteration(Part, Lane));
- recordVectorLoopValueForInductionCast(ID, EntryVal, Add, CastDef, State,
- Part, Lane);
}
}
}
@@ -3023,21 +2963,19 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// poison-generating flags (nuw/nsw, exact, inbounds, etc.). The scalarized
// instruction could feed a poison value to the base address of the widen
// load/store.
- if (State.MayGeneratePoisonRecipes.count(RepRecipe) > 0)
+ if (State.MayGeneratePoisonRecipes.contains(RepRecipe))
Cloned->dropPoisonGeneratingFlags();
State.Builder.SetInsertPoint(Builder.GetInsertBlock(),
Builder.GetInsertPoint());
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
- for (unsigned op = 0, e = RepRecipe->getNumOperands(); op != e; ++op) {
- auto *Operand = dyn_cast<Instruction>(Instr->getOperand(op));
+ for (auto &I : enumerate(RepRecipe->operands())) {
auto InputInstance = Instance;
- if (!Operand || !OrigLoop->contains(Operand) ||
- (Cost->isUniformAfterVectorization(Operand, State.VF)))
+ VPValue *Operand = I.value();
+ if (State.Plan->isUniformAfterVectorization(Operand))
InputInstance.Lane = VPLane::getFirstLane();
- auto *NewOp = State.get(RepRecipe->getOperand(op), InputInstance);
- Cloned->setOperand(op, NewOp);
+ Cloned->setOperand(I.index(), State.get(Operand, InputInstance));
}
addNewMetadata(Cloned, Instr);
@@ -3339,7 +3277,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
Value *InnerLoopVectorizer::emitTransformedIndex(
IRBuilder<> &B, Value *Index, ScalarEvolution *SE, const DataLayout &DL,
- const InductionDescriptor &ID) const {
+ const InductionDescriptor &ID, BasicBlock *VectorHeader) const {
SCEVExpander Exp(*SE, DL, "induction");
auto Step = ID.getStep();
@@ -3382,15 +3320,15 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
};
// Get a suitable insert point for SCEV expansion. For blocks in the vector
- // loop, choose the end of the vector loop header (=LoopVectorBody), because
+ // loop, choose the end of the vector loop header (=VectorHeader), because
// the DomTree is not kept up-to-date for additional blocks generated in the
// vector loop. By using the header as insertion point, we guarantee that the
// expanded instructions dominate all their uses.
- auto GetInsertPoint = [this, &B]() {
+ auto GetInsertPoint = [this, &B, VectorHeader]() {
BasicBlock *InsertBB = B.GetInsertPoint()->getParent();
if (InsertBB != LoopVectorBody &&
- LI->getLoopFor(LoopVectorBody) == LI->getLoopFor(InsertBB))
- return LoopVectorBody->getTerminator();
+ LI->getLoopFor(VectorHeader) == LI->getLoopFor(InsertBB))
+ return VectorHeader->getTerminator();
return &*B.GetInsertPoint();
};
@@ -3538,7 +3476,8 @@ void InnerLoopVectorizer::createInductionResumeValues(
CastInst::getCastOpcode(VectorTripCount, true, StepType, true);
Value *CRD = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.crd");
const DataLayout &DL = LoopScalarBody->getModule()->getDataLayout();
- EndValue = emitTransformedIndex(B, CRD, PSE.getSE(), DL, II);
+ EndValue =
+ emitTransformedIndex(B, CRD, PSE.getSE(), DL, II, LoopVectorBody);
EndValue->setName("ind.end");
// Compute the end value for the additional bypass (if applicable).
@@ -3549,7 +3488,7 @@ void InnerLoopVectorizer::createInductionResumeValues(
CRD =
B.CreateCast(CastOp, AdditionalBypass.second, StepType, "cast.crd");
EndValueFromAdditionalBypass =
- emitTransformedIndex(B, CRD, PSE.getSE(), DL, II);
+ emitTransformedIndex(B, CRD, PSE.getSE(), DL, II, LoopVectorBody);
EndValueFromAdditionalBypass->setName("ind.end");
}
}
@@ -3623,7 +3562,7 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L,
if (MDNode *LID = OrigLoop->getLoopID())
L->setLoopID(LID);
- LoopVectorizeHints Hints(L, true, *ORE);
+ LoopVectorizeHints Hints(L, true, *ORE, TTI);
Hints.setAlreadyVectorized();
#ifdef EXPENSIVE_CHECKS
@@ -3780,7 +3719,8 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
II.getStep()->getType())
: B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType());
CMO->setName("cast.cmo");
- Value *Escape = emitTransformedIndex(B, CMO, PSE.getSE(), DL, II);
+ Value *Escape =
+ emitTransformedIndex(B, CMO, PSE.getSE(), DL, II, LoopVectorBody);
Escape->setName("ind.escape");
MissingVals[UI] = Escape;
}
@@ -4573,7 +4513,8 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
}
}
-bool InnerLoopVectorizer::useOrderedReductions(RecurrenceDescriptor &RdxDesc) {
+bool InnerLoopVectorizer::useOrderedReductions(
+ const RecurrenceDescriptor &RdxDesc) {
return Cost->useOrderedReductions(RdxDesc);
}
@@ -4648,8 +4589,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Value *Idx = Builder.CreateAdd(
PartStart, ConstantInt::get(PtrInd->getType(), Lane));
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
- Value *SclrGep =
- emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);
+ Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(),
+ DL, II, State.CFG.PrevBB);
SclrGep->setName("next.gep");
State.set(PhiR, SclrGep, VPIteration(Part, Lane));
}
@@ -5368,13 +5309,9 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
// Limit MaxScalableVF by the maximum safe dependence distance.
Optional<unsigned> MaxVScale = TTI.getMaxVScale();
- if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
- unsigned VScaleMax = TheFunction->getFnAttribute(Attribute::VScaleRange)
- .getVScaleRangeArgs()
- .second;
- if (VScaleMax > 0)
- MaxVScale = VScaleMax;
- }
+ if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange))
+ MaxVScale =
+ TheFunction->getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
MaxScalableVF = ElementCount::getScalable(
MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
if (!MaxScalableVF)
@@ -5386,9 +5323,8 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
return MaxScalableVF;
}
-FixedScalableVFPair
-LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
- ElementCount UserVF) {
+FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
+ unsigned ConstTripCount, ElementCount UserVF, bool FoldTailByMasking) {
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
unsigned SmallestType, WidestType;
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
@@ -5475,12 +5411,14 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
FixedScalableVFPair Result(ElementCount::getFixed(1),
ElementCount::getScalable(0));
- if (auto MaxVF = getMaximizedVFForTarget(ConstTripCount, SmallestType,
- WidestType, MaxSafeFixedVF))
+ if (auto MaxVF =
+ getMaximizedVFForTarget(ConstTripCount, SmallestType, WidestType,
+ MaxSafeFixedVF, FoldTailByMasking))
Result.FixedVF = MaxVF;
- if (auto MaxVF = getMaximizedVFForTarget(ConstTripCount, SmallestType,
- WidestType, MaxSafeScalableVF))
+ if (auto MaxVF =
+ getMaximizedVFForTarget(ConstTripCount, SmallestType, WidestType,
+ MaxSafeScalableVF, FoldTailByMasking))
if (MaxVF.isScalable()) {
Result.ScalableVF = MaxVF;
LLVM_DEBUG(dbgs() << "LV: Found feasible scalable VF = " << MaxVF
@@ -5513,7 +5451,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
switch (ScalarEpilogueStatus) {
case CM_ScalarEpilogueAllowed:
- return computeFeasibleMaxVF(TC, UserVF);
+ return computeFeasibleMaxVF(TC, UserVF, false);
case CM_ScalarEpilogueNotAllowedUsePredicate:
LLVM_FALLTHROUGH;
case CM_ScalarEpilogueNotNeededUsePredicate:
@@ -5551,7 +5489,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
"scalar epilogue instead.\n");
ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
- return computeFeasibleMaxVF(TC, UserVF);
+ return computeFeasibleMaxVF(TC, UserVF, false);
}
return FixedScalableVFPair::getNone();
}
@@ -5568,7 +5506,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
}
- FixedScalableVFPair MaxFactors = computeFeasibleMaxVF(TC, UserVF);
+ FixedScalableVFPair MaxFactors = computeFeasibleMaxVF(TC, UserVF, true);
// Avoid tail folding if the trip count is known to be a multiple of any VF
// we chose.
// FIXME: The condition below pessimises the case for fixed-width vectors,
@@ -5641,7 +5579,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType,
- const ElementCount &MaxSafeVF) {
+ const ElementCount &MaxSafeVF, bool FoldTailByMasking) {
bool ComputeScalableMaxVF = MaxSafeVF.isScalable();
TypeSize WidestRegister = TTI.getRegisterBitWidth(
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
@@ -5673,14 +5611,17 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
const auto TripCountEC = ElementCount::getFixed(ConstTripCount);
if (ConstTripCount &&
ElementCount::isKnownLE(TripCountEC, MaxVectorElementCount) &&
- isPowerOf2_32(ConstTripCount)) {
- // We need to clamp the VF to be the ConstTripCount. There is no point in
- // choosing a higher viable VF as done in the loop below. If
- // MaxVectorElementCount is scalable, we only fall back on a fixed VF when
- // the TC is less than or equal to the known number of lanes.
- LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: "
- << ConstTripCount << "\n");
- return TripCountEC;
+ (!FoldTailByMasking || isPowerOf2_32(ConstTripCount))) {
+ // If loop trip count (TC) is known at compile time there is no point in
+ // choosing VF greater than TC (as done in the loop below). Select maximum
+ // power of two which doesn't exceed TC.
+ // If MaxVectorElementCount is scalable, we only fall back on a fixed VF
+ // when the TC is less than or equal to the known number of lanes.
+ auto ClampedConstTripCount = PowerOf2Floor(ConstTripCount);
+ LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to maximum power of two not "
+ "exceeding the constant trip count: "
+ << ClampedConstTripCount << "\n");
+ return ElementCount::getFixed(ClampedConstTripCount);
}
ElementCount MaxVF = MaxVectorElementCount;
@@ -5758,12 +5699,11 @@ bool LoopVectorizationCostModel::isMoreProfitable(
EstimatedWidthB *= VScale.getValue();
}
- // When set to preferred, for now assume vscale may be larger than 1 (or the
- // one being tuned for), so that scalable vectorization is slightly favorable
- // over fixed-width vectorization.
- if (Hints->isScalableVectorizationPreferred())
- if (A.Width.isScalable() && !B.Width.isScalable())
- return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA);
+ // Assume vscale may be larger than 1 (or the value being tuned for),
+ // so that scalable vectorization is slightly favorable over fixed-width
+ // vectorization.
+ if (A.Width.isScalable() && !B.Width.isScalable())
+ return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA);
// To avoid the need for FP division:
// (CostA / A.Width) < (CostB / B.Width)
@@ -6068,7 +6008,8 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
if (auto *PN = dyn_cast<PHINode>(&I)) {
if (!Legal->isReductionVariable(PN))
continue;
- const RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[PN];
+ const RecurrenceDescriptor &RdxDesc =
+ Legal->getReductionVars().find(PN)->second;
if (PreferInLoopReductions || useOrderedReductions(RdxDesc) ||
TTI.preferInLoopReduction(RdxDesc.getOpcode(),
RdxDesc.getRecurrenceType(),
@@ -7002,7 +6943,7 @@ Optional<InstructionCost> LoopVectorizationCostModel::getReductionPatternCost(
ReductionPhi = InLoopReductionImmediateChains[ReductionPhi];
const RecurrenceDescriptor &RdxDesc =
- Legal->getReductionVars()[cast<PHINode>(ReductionPhi)];
+ Legal->getReductionVars().find(cast<PHINode>(ReductionPhi))->second;
InstructionCost BaseCost = TTI.getArithmeticReductionCost(
RdxDesc.getOpcode(), VectorTy, RdxDesc.getFastMathFlags(), CostKind);
@@ -7079,22 +7020,41 @@ Optional<InstructionCost> LoopVectorizationCostModel::getReductionPatternCost(
match(RedOp, m_Mul(m_Instruction(Op0), m_Instruction(Op1)))) {
if (match(Op0, m_ZExtOrSExt(m_Value())) &&
Op0->getOpcode() == Op1->getOpcode() &&
- Op0->getOperand(0)->getType() == Op1->getOperand(0)->getType() &&
!TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1)) {
bool IsUnsigned = isa<ZExtInst>(Op0);
- auto *ExtType = VectorType::get(Op0->getOperand(0)->getType(), VectorTy);
- // Matched reduce(mul(ext, ext))
- InstructionCost ExtCost =
- TTI.getCastInstrCost(Op0->getOpcode(), VectorTy, ExtType,
- TTI::CastContextHint::None, CostKind, Op0);
+ Type *Op0Ty = Op0->getOperand(0)->getType();
+ Type *Op1Ty = Op1->getOperand(0)->getType();
+ Type *LargestOpTy =
+ Op0Ty->getIntegerBitWidth() < Op1Ty->getIntegerBitWidth() ? Op1Ty
+ : Op0Ty;
+ auto *ExtType = VectorType::get(LargestOpTy, VectorTy);
+
+ // Matched reduce(mul(ext(A), ext(B))), where the two ext may be of
+ // different sizes. We take the largest type as the ext to reduce, and add
+ // the remaining cost as, for example reduce(mul(ext(ext(A)), ext(B))).
+ InstructionCost ExtCost0 = TTI.getCastInstrCost(
+ Op0->getOpcode(), VectorTy, VectorType::get(Op0Ty, VectorTy),
+ TTI::CastContextHint::None, CostKind, Op0);
+ InstructionCost ExtCost1 = TTI.getCastInstrCost(
+ Op1->getOpcode(), VectorTy, VectorType::get(Op1Ty, VectorTy),
+ TTI::CastContextHint::None, CostKind, Op1);
InstructionCost MulCost =
TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
InstructionCost RedCost = TTI.getExtendedAddReductionCost(
/*IsMLA=*/true, IsUnsigned, RdxDesc.getRecurrenceType(), ExtType,
CostKind);
+ InstructionCost ExtraExtCost = 0;
+ if (Op0Ty != LargestOpTy || Op1Ty != LargestOpTy) {
+ Instruction *ExtraExtOp = (Op0Ty != LargestOpTy) ? Op0 : Op1;
+ ExtraExtCost = TTI.getCastInstrCost(
+ ExtraExtOp->getOpcode(), ExtType,
+ VectorType::get(ExtraExtOp->getOperand(0)->getType(), VectorTy),
+ TTI::CastContextHint::None, CostKind, ExtraExtOp);
+ }
- if (RedCost.isValid() && RedCost < ExtCost * 2 + MulCost + BaseCost)
+ if (RedCost.isValid() &&
+ (RedCost + ExtraExtCost) < (ExtCost0 + ExtCost1 + MulCost + BaseCost))
return I == RetI ? RedCost : 0;
} else if (!match(I, m_ZExtOrSExt(m_Value()))) {
// Matched reduce(mul())
@@ -7570,8 +7530,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
Type *CondTy = SI->getCondition()->getType();
if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF);
- return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind, I);
+
+ CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
+ if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
+ Pred = Cmp->getPredicate();
+ return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, Pred,
+ CostKind, I);
}
case Instruction::ICmp:
case Instruction::FCmp: {
@@ -7581,7 +7545,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
VectorTy = ToVectorTy(ValTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr,
- CmpInst::BAD_ICMP_PREDICATE, CostKind, I);
+ cast<CmpInst>(I)->getPredicate(), CostKind,
+ I);
}
case Instruction::Store:
case Instruction::Load: {
@@ -7762,14 +7727,14 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
// Ignore type-promoting instructions we identified during reduction
// detection.
for (auto &Reduction : Legal->getReductionVars()) {
- RecurrenceDescriptor &RedDes = Reduction.second;
+ const RecurrenceDescriptor &RedDes = Reduction.second;
const SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
VecValuesToIgnore.insert(Casts.begin(), Casts.end());
}
// Ignore type-casting instructions we identified during induction
// detection.
for (auto &Induction : Legal->getInductionVars()) {
- InductionDescriptor &IndDes = Induction.second;
+ const InductionDescriptor &IndDes = Induction.second;
const SmallVectorImpl<Instruction *> &Casts = IndDes.getCastInsts();
VecValuesToIgnore.insert(Casts.begin(), Casts.end());
}
@@ -7778,7 +7743,7 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
void LoopVectorizationCostModel::collectInLoopReductions() {
for (auto &Reduction : Legal->getReductionVars()) {
PHINode *Phi = Reduction.first;
- RecurrenceDescriptor &RdxDesc = Reduction.second;
+ const RecurrenceDescriptor &RdxDesc = Reduction.second;
// We don't collect reductions that are type promoted (yet).
if (RdxDesc.getRecurrenceType() != Phi->getType())
@@ -8064,18 +8029,6 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
return U == Ind || DeadInstructions.count(cast<Instruction>(U));
}))
DeadInstructions.insert(IndUpdate);
-
- // We record as "Dead" also the type-casting instructions we had identified
- // during induction analysis. We don't need any handling for them in the
- // vectorized loop because we have proven that, under a proper runtime
- // test guarding the vectorized loop, the value of the phi, and the casted
- // value of the phi, are the same. The last instruction in this casting chain
- // will get its scalar/vector/widened def from the scalar/vector/widened def
- // of the respective phi node. Any other casts in the induction def-use chain
- // have no other uses outside the phi update chain, and will be ignored.
- InductionDescriptor &IndDes = Induction.second;
- const SmallVectorImpl<Instruction *> &Casts = IndDes.getCastInsts();
- DeadInstructions.insert(Casts.begin(), Casts.end());
}
}
@@ -8461,7 +8414,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
assert(EdgeMask && "No Edge Mask found for condition");
if (BI->getSuccessor(0) != Dst)
- EdgeMask = Builder.createNot(EdgeMask);
+ EdgeMask = Builder.createNot(EdgeMask, BI->getDebugLoc());
if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
// The condition is 'SrcMask && EdgeMask', which is equivalent to
@@ -8470,7 +8423,8 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
// EdgeMask is poison. Using 'and' here introduces undefined behavior.
VPValue *False = Plan->getOrAddVPValue(
ConstantInt::getFalse(BI->getCondition()->getType()));
- EdgeMask = Builder.createSelect(SrcMask, EdgeMask, False);
+ EdgeMask =
+ Builder.createSelect(SrcMask, EdgeMask, False, BI->getDebugLoc());
}
return EdgeMaskCache[Edge] = EdgeMask;
@@ -8492,22 +8446,24 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
if (!CM.blockNeedsPredicationForAnyReason(BB))
return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one.
- // Create the block in mask as the first non-phi instruction in the block.
- VPBuilder::InsertPointGuard Guard(Builder);
- auto NewInsertionPoint = Builder.getInsertBlock()->getFirstNonPhi();
- Builder.setInsertPoint(Builder.getInsertBlock(), NewInsertionPoint);
-
// Introduce the early-exit compare IV <= BTC to form header block mask.
// This is used instead of IV < TC because TC may wrap, unlike BTC.
- // Start by constructing the desired canonical IV.
+ // Start by constructing the desired canonical IV in the header block.
VPValue *IV = nullptr;
if (Legal->getPrimaryInduction())
IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction());
else {
+ VPBasicBlock *HeaderVPBB = Plan->getEntry()->getEntryBasicBlock();
auto *IVRecipe = new VPWidenCanonicalIVRecipe();
- Builder.getInsertBlock()->insert(IVRecipe, NewInsertionPoint);
+ HeaderVPBB->insert(IVRecipe, HeaderVPBB->getFirstNonPhi());
IV = IVRecipe;
}
+
+ // Create the block in mask as the first non-phi instruction in the block.
+ VPBuilder::InsertPointGuard Guard(Builder);
+ auto NewInsertionPoint = Builder.getInsertBlock()->getFirstNonPhi();
+ Builder.setInsertPoint(Builder.getInsertBlock(), NewInsertionPoint);
+
VPValue *BTC = Plan->getOrCreateBackedgeTakenCount();
bool TailFolded = !CM.isScalarEpilogueAllowed();
@@ -8534,7 +8490,7 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
continue;
}
- BlockMask = Builder.createOr(BlockMask, EdgeMask);
+ BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
}
return BlockMaskCache[BB] = BlockMask;
@@ -8591,14 +8547,10 @@ VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi,
ArrayRef<VPValue *> Operands) const {
// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
- InductionDescriptor II = Legal->getInductionVars().lookup(Phi);
- if (II.getKind() == InductionDescriptor::IK_IntInduction ||
- II.getKind() == InductionDescriptor::IK_FpInduction) {
- assert(II.getStartValue() ==
+ if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) {
+ assert(II->getStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
- const SmallVectorImpl<Instruction *> &Casts = II.getCastInsts();
- return new VPWidenIntOrFpInductionRecipe(
- Phi, Operands[0], Casts.empty() ? nullptr : Casts.front());
+ return new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], *II);
}
return nullptr;
@@ -8624,11 +8576,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
if (LoopVectorizationPlanner::getDecisionAndClampRange(
isOptimizableIVTruncate(I), Range)) {
- InductionDescriptor II =
- Legal->getInductionVars().lookup(cast<PHINode>(I->getOperand(0)));
+ auto *Phi = cast<PHINode>(I->getOperand(0));
+ const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
VPValue *Start = Plan.getOrAddVPValue(II.getStartValue());
- return new VPWidenIntOrFpInductionRecipe(cast<PHINode>(I->getOperand(0)),
- Start, nullptr, I);
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I);
}
return nullptr;
}
@@ -8844,13 +8795,17 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
return VPBB;
}
LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n");
- assert(VPBB->getSuccessors().empty() &&
- "VPBB has successors when handling predicated replication.");
+
+ VPBlockBase *SingleSucc = VPBB->getSingleSuccessor();
+ assert(SingleSucc && "VPBB must have a single successor when handling "
+ "predicated replication.");
+ VPBlockUtils::disconnectBlocks(VPBB, SingleSucc);
// Record predicated instructions for above packing optimizations.
VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan);
VPBlockUtils::insertBlockAfter(Region, VPBB);
auto *RegSucc = new VPBasicBlock();
VPBlockUtils::insertBlockAfter(RegSucc, Region);
+ VPBlockUtils::connectBlocks(RegSucc, SingleSucc);
return RegSucc;
}
@@ -8910,7 +8865,8 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
if (Legal->isReductionVariable(Phi) || Legal->isFirstOrderRecurrence(Phi)) {
VPValue *StartV = Operands[0];
if (Legal->isReductionVariable(Phi)) {
- RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi];
+ const RecurrenceDescriptor &RdxDesc =
+ Legal->getReductionVars().find(Phi)->second;
assert(RdxDesc.getRecurrenceStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc, *StartV,
@@ -9031,7 +8987,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
for (auto &Reduction : CM.getInLoopReductionChains()) {
PHINode *Phi = Reduction.first;
- RecurKind Kind = Legal->getReductionVars()[Phi].getRecurrenceKind();
+ RecurKind Kind =
+ Legal->getReductionVars().find(Phi)->second.getRecurrenceKind();
const SmallVector<Instruction *, 4> &ReductionOperations = Reduction.second;
RecipeBuilder.recordRecipeOf(Phi);
@@ -9069,30 +9026,25 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// visit each basic block after having visited its predecessor basic blocks.
// ---------------------------------------------------------------------------
- auto Plan = std::make_unique<VPlan>();
+ // Create initial VPlan skeleton, with separate header and latch blocks.
+ VPBasicBlock *HeaderVPBB = new VPBasicBlock();
+ VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
+ VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
+ auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");
+ auto Plan = std::make_unique<VPlan>(TopRegion);
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
LoopBlocksDFS DFS(OrigLoop);
DFS.perform(LI);
- VPBasicBlock *VPBB = nullptr;
- VPBasicBlock *HeaderVPBB = nullptr;
+ VPBasicBlock *VPBB = HeaderVPBB;
SmallVector<VPWidenIntOrFpInductionRecipe *> InductionsToMove;
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
// Relevant instructions from basic block BB will be grouped into VPRecipe
// ingredients and fill a new VPBasicBlock.
unsigned VPBBsForBB = 0;
- auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
- if (VPBB)
- VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
- else {
- auto *TopRegion = new VPRegionBlock("vector loop");
- TopRegion->setEntry(FirstVPBBForBB);
- Plan->setEntry(TopRegion);
- HeaderVPBB = FirstVPBBForBB;
- }
- VPBB = FirstVPBBForBB;
+ VPBB->setName(BB->getName());
Builder.setInsertPoint(VPBB);
// Introduce each ingredient into VPlan.
@@ -9159,13 +9111,21 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
: "");
}
}
+
+ VPBlockUtils::insertBlockAfter(new VPBasicBlock(), VPBB);
+ VPBB = cast<VPBasicBlock>(VPBB->getSingleSuccessor());
}
+ // Fold the last, empty block into its predecessor.
+ VPBB = VPBlockUtils::tryToMergeBlockIntoPredecessor(VPBB);
+ assert(VPBB && "expected to fold last (empty) block");
+ // After here, VPBB should not be used.
+ VPBB = nullptr;
+
assert(isa<VPRegionBlock>(Plan->getEntry()) &&
!Plan->getEntry()->getEntryBasicBlock()->empty() &&
"entry block must be set to a VPRegionBlock having a non-empty entry "
"VPBasicBlock");
- cast<VPRegionBlock>(Plan->getEntry())->setExit(VPBB);
RecipeBuilder.fixHeaderPhis();
// ---------------------------------------------------------------------------
@@ -9231,18 +9191,19 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock);
VPBlockUtils::connectBlocks(SplitPred, SinkRegion);
VPBlockUtils::connectBlocks(SinkRegion, SplitBlock);
- if (VPBB == SplitPred)
- VPBB = SplitBlock;
}
}
+ VPlanTransforms::removeRedundantInductionCasts(*Plan);
+
// Now that sink-after is done, move induction recipes for optimized truncates
// to the phi section of the header block.
for (VPWidenIntOrFpInductionRecipe *Ind : InductionsToMove)
Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi());
// Adjust the recipes for any inloop reductions.
- adjustRecipesForReductions(VPBB, Plan, RecipeBuilder, Range.Start);
+ adjustRecipesForReductions(cast<VPBasicBlock>(TopRegion->getExit()), Plan,
+ RecipeBuilder, Range.Start);
// Introduce a recipe to combine the incoming and previous values of a
// first-order recurrence.
@@ -9322,6 +9283,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RSO.flush();
Plan->setName(PlanName);
+ // Fold Exit block into its predecessor if possible.
+ // TODO: Fold block earlier once all VPlan transforms properly maintain a
+ // VPBasicBlock as exit.
+ VPBlockUtils::tryToMergeBlockIntoPredecessor(TopRegion->getExit());
+
assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
}
@@ -9355,9 +9321,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
}
SmallPtrSet<Instruction *, 1> DeadInstructions;
- VPlanTransforms::VPInstructionsToVPRecipes(OrigLoop, Plan,
- Legal->getInductionVars(),
- DeadInstructions, *PSE.getSE());
+ VPlanTransforms::VPInstructionsToVPRecipes(
+ OrigLoop, Plan,
+ [this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); },
+ DeadInstructions, *PSE.getSE());
return Plan;
}
@@ -9371,7 +9338,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
ElementCount MinVF) {
for (auto &Reduction : CM.getInLoopReductionChains()) {
PHINode *Phi = Reduction.first;
- RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi];
+ const RecurrenceDescriptor &RdxDesc =
+ Legal->getReductionVars().find(Phi)->second;
const SmallVector<Instruction *, 4> &ReductionOperations = Reduction.second;
if (MinVF.isScalar() && !CM.useOrderedReductions(RdxDesc))
@@ -9565,7 +9533,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
// exact, etc.). The control flow has been linearized and the
// instruction is no longer guarded by the predicate, which could make
// the flag properties to no longer hold.
- if (State.MayGeneratePoisonRecipes.count(this) > 0)
+ if (State.MayGeneratePoisonRecipes.contains(this))
VecOp->dropPoisonGeneratingFlags();
}
@@ -9714,9 +9682,9 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Int or FP induction being replicated.");
- State.ILV->widenIntOrFpInduction(IV, getStartValue()->getLiveInIRValue(),
- getTruncInst(), getVPValue(0),
- getCastValue(), State);
+ State.ILV->widenIntOrFpInduction(IV, getInductionDescriptor(),
+ getStartValue()->getLiveInIRValue(),
+ getTruncInst(), getVPValue(0), State);
}
void VPWidenPHIRecipe::execute(VPTransformState &State) {
@@ -10293,7 +10261,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
<< L->getHeader()->getParent()->getName() << "\" from "
<< DebugLocStr << "\n");
- LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE);
+ LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI);
LLVM_DEBUG(
dbgs() << "LV: Loop hints:"
@@ -10747,8 +10715,17 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
PA.preserve<LoopAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
}
- if (!Result.MadeCFGChange)
+
+ if (Result.MadeCFGChange) {
+ // Making CFG changes likely means a loop got vectorized. Indicate that
+ // extra simplification passes should be run.
+ // TODO: MadeCFGChanges is not a prefect proxy. Extra passes should only
+ // be run if runtime checks have been added.
+ AM.getResult<ShouldRunExtraVectorPasses>(F);
+ PA.preserve<ShouldRunExtraVectorPasses>();
+ } else {
PA.preserveSet<CFGAnalyses>();
+ }
return PA;
}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 95061e9053fa..37ae13666f7a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -631,27 +631,26 @@ static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
/// after: 6 3 5 4 7 2 1 0
static void fixupOrderingIndices(SmallVectorImpl<unsigned> &Order) {
const unsigned Sz = Order.size();
- SmallBitVector UsedIndices(Sz);
- SmallVector<int> MaskedIndices;
+ SmallBitVector UnusedIndices(Sz, /*t=*/true);
+ SmallBitVector MaskedIndices(Sz);
for (unsigned I = 0; I < Sz; ++I) {
if (Order[I] < Sz)
- UsedIndices.set(Order[I]);
+ UnusedIndices.reset(Order[I]);
else
- MaskedIndices.push_back(I);
+ MaskedIndices.set(I);
}
- if (MaskedIndices.empty())
+ if (MaskedIndices.none())
return;
- SmallVector<int> AvailableIndices(MaskedIndices.size());
- unsigned Cnt = 0;
- int Idx = UsedIndices.find_first();
- do {
- AvailableIndices[Cnt] = Idx;
- Idx = UsedIndices.find_next(Idx);
- ++Cnt;
- } while (Idx > 0);
- assert(Cnt == MaskedIndices.size() && "Non-synced masked/available indices.");
- for (int I = 0, E = MaskedIndices.size(); I < E; ++I)
- Order[MaskedIndices[I]] = AvailableIndices[I];
+ assert(UnusedIndices.count() == MaskedIndices.count() &&
+ "Non-synced masked/available indices.");
+ int Idx = UnusedIndices.find_first();
+ int MIdx = MaskedIndices.find_first();
+ while (MIdx >= 0) {
+ assert(Idx >= 0 && "Indices must be synced.");
+ Order[MIdx] = Idx;
+ Idx = UnusedIndices.find_next(Idx);
+ MIdx = MaskedIndices.find_next(MIdx);
+ }
}
namespace llvm {
@@ -812,6 +811,13 @@ public:
/// ExtractElement, ExtractValue), which can be part of the graph.
Optional<OrdersType> findReusedOrderedScalars(const TreeEntry &TE);
+ /// Gets reordering data for the given tree entry. If the entry is vectorized
+ /// - just return ReorderIndices, otherwise check if the scalars can be
+ /// reordered and return the most optimal order.
+ /// \param TopToBottom If true, include the order of vectorized stores and
+ /// insertelement nodes, otherwise skip them.
+ Optional<OrdersType> getReorderingData(const TreeEntry &TE, bool TopToBottom);
+
/// Reorders the current graph to the most profitable order starting from the
/// root node to the leaf nodes. The best order is chosen only from the nodes
/// of the same size (vectorization factor). Smaller nodes are considered
@@ -1010,18 +1016,25 @@ public:
std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
}
- // The hard-coded scores listed here are not very important. When computing
- // the scores of matching one sub-tree with another, we are basically
- // counting the number of values that are matching. So even if all scores
- // are set to 1, we would still get a decent matching result.
+ // The hard-coded scores listed here are not very important, though it shall
+ // be higher for better matches to improve the resulting cost. When
+ // computing the scores of matching one sub-tree with another, we are
+ // basically counting the number of values that are matching. So even if all
+ // scores are set to 1, we would still get a decent matching result.
// However, sometimes we have to break ties. For example we may have to
// choose between matching loads vs matching opcodes. This is what these
- // scores are helping us with: they provide the order of preference.
+ // scores are helping us with: they provide the order of preference. Also,
+ // this is important if the scalar is externally used or used in another
+ // tree entry node in the different lane.
/// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]).
- static const int ScoreConsecutiveLoads = 3;
+ static const int ScoreConsecutiveLoads = 4;
+ /// Loads from reversed memory addresses, e.g. load(A[i+1]), load(A[i]).
+ static const int ScoreReversedLoads = 3;
/// ExtractElementInst from same vector and consecutive indexes.
- static const int ScoreConsecutiveExtracts = 3;
+ static const int ScoreConsecutiveExtracts = 4;
+ /// ExtractElementInst from same vector and reversed indices.
+ static const int ScoreReversedExtracts = 3;
/// Constants.
static const int ScoreConstants = 2;
/// Instructions with the same opcode.
@@ -1041,7 +1054,10 @@ public:
/// \returns the score of placing \p V1 and \p V2 in consecutive lanes.
static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL,
- ScalarEvolution &SE) {
+ ScalarEvolution &SE, int NumLanes) {
+ if (V1 == V2)
+ return VLOperands::ScoreSplat;
+
auto *LI1 = dyn_cast<LoadInst>(V1);
auto *LI2 = dyn_cast<LoadInst>(V2);
if (LI1 && LI2) {
@@ -1051,8 +1067,17 @@ public:
Optional<int> Dist = getPointersDiff(
LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
- return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads
- : VLOperands::ScoreFail;
+ if (!Dist)
+ return VLOperands::ScoreFail;
+ // The distance is too large - still may be profitable to use masked
+ // loads/gathers.
+ if (std::abs(*Dist) > NumLanes / 2)
+ return VLOperands::ScoreAltOpcodes;
+ // This still will detect consecutive loads, but we might have "holes"
+ // in some cases. It is ok for non-power-2 vectorization and may produce
+ // better results. It should not affect current vectorization.
+ return (*Dist > 0) ? VLOperands::ScoreConsecutiveLoads
+ : VLOperands::ScoreReversedLoads;
}
auto *C1 = dyn_cast<Constant>(V1);
@@ -1062,18 +1087,41 @@ public:
// Extracts from consecutive indexes of the same vector better score as
// the extracts could be optimized away.
- Value *EV;
- ConstantInt *Ex1Idx, *Ex2Idx;
- if (match(V1, m_ExtractElt(m_Value(EV), m_ConstantInt(Ex1Idx))) &&
- match(V2, m_ExtractElt(m_Deferred(EV), m_ConstantInt(Ex2Idx))) &&
- Ex1Idx->getZExtValue() + 1 == Ex2Idx->getZExtValue())
- return VLOperands::ScoreConsecutiveExtracts;
+ Value *EV1;
+ ConstantInt *Ex1Idx;
+ if (match(V1, m_ExtractElt(m_Value(EV1), m_ConstantInt(Ex1Idx)))) {
+ // Undefs are always profitable for extractelements.
+ if (isa<UndefValue>(V2))
+ return VLOperands::ScoreConsecutiveExtracts;
+ Value *EV2 = nullptr;
+ ConstantInt *Ex2Idx = nullptr;
+ if (match(V2,
+ m_ExtractElt(m_Value(EV2), m_CombineOr(m_ConstantInt(Ex2Idx),
+ m_Undef())))) {
+ // Undefs are always profitable for extractelements.
+ if (!Ex2Idx)
+ return VLOperands::ScoreConsecutiveExtracts;
+ if (isUndefVector(EV2) && EV2->getType() == EV1->getType())
+ return VLOperands::ScoreConsecutiveExtracts;
+ if (EV2 == EV1) {
+ int Idx1 = Ex1Idx->getZExtValue();
+ int Idx2 = Ex2Idx->getZExtValue();
+ int Dist = Idx2 - Idx1;
+ // The distance is too large - still may be profitable to use
+ // shuffles.
+ if (std::abs(Dist) > NumLanes / 2)
+ return VLOperands::ScoreAltOpcodes;
+ return (Dist > 0) ? VLOperands::ScoreConsecutiveExtracts
+ : VLOperands::ScoreReversedExtracts;
+ }
+ }
+ }
auto *I1 = dyn_cast<Instruction>(V1);
auto *I2 = dyn_cast<Instruction>(V2);
if (I1 && I2) {
- if (I1 == I2)
- return VLOperands::ScoreSplat;
+ if (I1->getParent() != I2->getParent())
+ return VLOperands::ScoreFail;
InstructionsState S = getSameOpcode({I1, I2});
// Note: Only consider instructions with <= 2 operands to avoid
// complexity explosion.
@@ -1088,11 +1136,13 @@ public:
return VLOperands::ScoreFail;
}
- /// Holds the values and their lane that are taking part in the look-ahead
+ /// Holds the values and their lanes that are taking part in the look-ahead
/// score calculation. This is used in the external uses cost calculation.
- SmallDenseMap<Value *, int> InLookAheadValues;
+ /// Need to hold all the lanes in case of splat/broadcast at least to
+ /// correctly check for the use in the different lane.
+ SmallDenseMap<Value *, SmallSet<int, 4>> InLookAheadValues;
- /// \Returns the additinal cost due to uses of \p LHS and \p RHS that are
+ /// \returns the additional cost due to uses of \p LHS and \p RHS that are
/// either external to the vectorized code, or require shuffling.
int getExternalUsesCost(const std::pair<Value *, int> &LHS,
const std::pair<Value *, int> &RHS) {
@@ -1116,22 +1166,30 @@ public:
for (User *U : V->users()) {
if (const TreeEntry *UserTE = R.getTreeEntry(U)) {
// The user is in the VectorizableTree. Check if we need to insert.
- auto It = llvm::find(UserTE->Scalars, U);
- assert(It != UserTE->Scalars.end() && "U is in UserTE");
- int UserLn = std::distance(UserTE->Scalars.begin(), It);
+ int UserLn = UserTE->findLaneForValue(U);
assert(UserLn >= 0 && "Bad lane");
- if (UserLn != Ln)
+ // If the values are different, check just the line of the current
+ // value. If the values are the same, need to add UserInDiffLaneCost
+ // only if UserLn does not match both line numbers.
+ if ((LHS.first != RHS.first && UserLn != Ln) ||
+ (LHS.first == RHS.first && UserLn != LHS.second &&
+ UserLn != RHS.second)) {
Cost += UserInDiffLaneCost;
+ break;
+ }
} else {
// Check if the user is in the look-ahead code.
auto It2 = InLookAheadValues.find(U);
if (It2 != InLookAheadValues.end()) {
// The user is in the look-ahead code. Check the lane.
- if (It2->second != Ln)
+ if (!It2->getSecond().contains(Ln)) {
Cost += UserInDiffLaneCost;
+ break;
+ }
} else {
// The user is neither in SLP tree nor in the look-ahead code.
Cost += ExternalUseCost;
+ break;
}
}
// Limit the number of visited uses to cap compilation time.
@@ -1170,32 +1228,36 @@ public:
Value *V1 = LHS.first;
Value *V2 = RHS.first;
// Get the shallow score of V1 and V2.
- int ShallowScoreAtThisLevel =
- std::max((int)ScoreFail, getShallowScore(V1, V2, DL, SE) -
- getExternalUsesCost(LHS, RHS));
+ int ShallowScoreAtThisLevel = std::max(
+ (int)ScoreFail, getShallowScore(V1, V2, DL, SE, getNumLanes()) -
+ getExternalUsesCost(LHS, RHS));
int Lane1 = LHS.second;
int Lane2 = RHS.second;
// If reached MaxLevel,
// or if V1 and V2 are not instructions,
// or if they are SPLAT,
- // or if they are not consecutive, early return the current cost.
+ // or if they are not consecutive,
+ // or if profitable to vectorize loads or extractelements, early return
+ // the current cost.
auto *I1 = dyn_cast<Instruction>(V1);
auto *I2 = dyn_cast<Instruction>(V2);
if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 ||
ShallowScoreAtThisLevel == VLOperands::ScoreFail ||
- (isa<LoadInst>(I1) && isa<LoadInst>(I2) && ShallowScoreAtThisLevel))
+ (((isa<LoadInst>(I1) && isa<LoadInst>(I2)) ||
+ (isa<ExtractElementInst>(I1) && isa<ExtractElementInst>(I2))) &&
+ ShallowScoreAtThisLevel))
return ShallowScoreAtThisLevel;
assert(I1 && I2 && "Should have early exited.");
// Keep track of in-tree values for determining the external-use cost.
- InLookAheadValues[V1] = Lane1;
- InLookAheadValues[V2] = Lane2;
+ InLookAheadValues[V1].insert(Lane1);
+ InLookAheadValues[V2].insert(Lane2);
// Contains the I2 operand indexes that got matched with I1 operands.
SmallSet<unsigned, 4> Op2Used;
- // Recursion towards the operands of I1 and I2. We are trying all possbile
+ // Recursion towards the operands of I1 and I2. We are trying all possible
// operand pairs, and keeping track of the best score.
for (unsigned OpIdx1 = 0, NumOperands1 = I1->getNumOperands();
OpIdx1 != NumOperands1; ++OpIdx1) {
@@ -1319,27 +1381,79 @@ public:
return None;
}
- /// Helper for reorderOperandVecs. \Returns the lane that we should start
- /// reordering from. This is the one which has the least number of operands
- /// that can freely move about.
+ /// Helper for reorderOperandVecs.
+ /// \returns the lane that we should start reordering from. This is the one
+ /// which has the least number of operands that can freely move about or
+ /// less profitable because it already has the most optimal set of operands.
unsigned getBestLaneToStartReordering() const {
- unsigned BestLane = 0;
unsigned Min = UINT_MAX;
- for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
- ++Lane) {
- unsigned NumFreeOps = getMaxNumOperandsThatCanBeReordered(Lane);
- if (NumFreeOps < Min) {
- Min = NumFreeOps;
- BestLane = Lane;
+ unsigned SameOpNumber = 0;
+ // std::pair<unsigned, unsigned> is used to implement a simple voting
+ // algorithm and choose the lane with the least number of operands that
+ // can freely move about or less profitable because it already has the
+ // most optimal set of operands. The first unsigned is a counter for
+ // voting, the second unsigned is the counter of lanes with instructions
+ // with same/alternate opcodes and same parent basic block.
+ MapVector<unsigned, std::pair<unsigned, unsigned>> HashMap;
+ // Try to be closer to the original results, if we have multiple lanes
+ // with same cost. If 2 lanes have the same cost, use the one with the
+ // lowest index.
+ for (int I = getNumLanes(); I > 0; --I) {
+ unsigned Lane = I - 1;
+ OperandsOrderData NumFreeOpsHash =
+ getMaxNumOperandsThatCanBeReordered(Lane);
+ // Compare the number of operands that can move and choose the one with
+ // the least number.
+ if (NumFreeOpsHash.NumOfAPOs < Min) {
+ Min = NumFreeOpsHash.NumOfAPOs;
+ SameOpNumber = NumFreeOpsHash.NumOpsWithSameOpcodeParent;
+ HashMap.clear();
+ HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane);
+ } else if (NumFreeOpsHash.NumOfAPOs == Min &&
+ NumFreeOpsHash.NumOpsWithSameOpcodeParent < SameOpNumber) {
+ // Select the most optimal lane in terms of number of operands that
+ // should be moved around.
+ SameOpNumber = NumFreeOpsHash.NumOpsWithSameOpcodeParent;
+ HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane);
+ } else if (NumFreeOpsHash.NumOfAPOs == Min &&
+ NumFreeOpsHash.NumOpsWithSameOpcodeParent == SameOpNumber) {
+ ++HashMap[NumFreeOpsHash.Hash].first;
+ }
+ }
+ // Select the lane with the minimum counter.
+ unsigned BestLane = 0;
+ unsigned CntMin = UINT_MAX;
+ for (const auto &Data : reverse(HashMap)) {
+ if (Data.second.first < CntMin) {
+ CntMin = Data.second.first;
+ BestLane = Data.second.second;
}
}
return BestLane;
}
- /// \Returns the maximum number of operands that are allowed to be reordered
- /// for \p Lane. This is used as a heuristic for selecting the first lane to
- /// start operand reordering.
- unsigned getMaxNumOperandsThatCanBeReordered(unsigned Lane) const {
+ /// Data structure that helps to reorder operands.
+ struct OperandsOrderData {
+ /// The best number of operands with the same APOs, which can be
+ /// reordered.
+ unsigned NumOfAPOs = UINT_MAX;
+ /// Number of operands with the same/alternate instruction opcode and
+ /// parent.
+ unsigned NumOpsWithSameOpcodeParent = 0;
+ /// Hash for the actual operands ordering.
+ /// Used to count operands, actually their position id and opcode
+ /// value. It is used in the voting mechanism to find the lane with the
+ /// least number of operands that can freely move about or less profitable
+ /// because it already has the most optimal set of operands. Can be
+ /// replaced with SmallVector<unsigned> instead but hash code is faster
+ /// and requires less memory.
+ unsigned Hash = 0;
+ };
+ /// \returns the maximum number of operands that are allowed to be reordered
+ /// for \p Lane and the number of compatible instructions(with the same
+ /// parent/opcode). This is used as a heuristic for selecting the first lane
+ /// to start operand reordering.
+ OperandsOrderData getMaxNumOperandsThatCanBeReordered(unsigned Lane) const {
unsigned CntTrue = 0;
unsigned NumOperands = getNumOperands();
// Operands with the same APO can be reordered. We therefore need to count
@@ -1348,11 +1462,45 @@ public:
// a map. Instead we can simply count the number of operands that
// correspond to one of them (in this case the 'true' APO), and calculate
// the other by subtracting it from the total number of operands.
- for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
- if (getData(OpIdx, Lane).APO)
+ // Operands with the same instruction opcode and parent are more
+ // profitable since we don't need to move them in many cases, with a high
+ // probability such lane already can be vectorized effectively.
+ bool AllUndefs = true;
+ unsigned NumOpsWithSameOpcodeParent = 0;
+ Instruction *OpcodeI = nullptr;
+ BasicBlock *Parent = nullptr;
+ unsigned Hash = 0;
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ const OperandData &OpData = getData(OpIdx, Lane);
+ if (OpData.APO)
++CntTrue;
- unsigned CntFalse = NumOperands - CntTrue;
- return std::max(CntTrue, CntFalse);
+ // Use Boyer-Moore majority voting for finding the majority opcode and
+ // the number of times it occurs.
+ if (auto *I = dyn_cast<Instruction>(OpData.V)) {
+ if (!OpcodeI || !getSameOpcode({OpcodeI, I}).getOpcode() ||
+ I->getParent() != Parent) {
+ if (NumOpsWithSameOpcodeParent == 0) {
+ NumOpsWithSameOpcodeParent = 1;
+ OpcodeI = I;
+ Parent = I->getParent();
+ } else {
+ --NumOpsWithSameOpcodeParent;
+ }
+ } else {
+ ++NumOpsWithSameOpcodeParent;
+ }
+ }
+ Hash = hash_combine(
+ Hash, hash_value((OpIdx + 1) * (OpData.V->getValueID() + 1)));
+ AllUndefs = AllUndefs && isa<UndefValue>(OpData.V);
+ }
+ if (AllUndefs)
+ return {};
+ OperandsOrderData Data;
+ Data.NumOfAPOs = std::max(CntTrue, NumOperands - CntTrue);
+ Data.NumOpsWithSameOpcodeParent = NumOpsWithSameOpcodeParent;
+ Data.Hash = Hash;
+ return Data;
}
/// Go through the instructions in VL and append their operands.
@@ -1500,11 +1648,37 @@ public:
ReorderingModes[OpIdx] = ReorderingMode::Failed;
}
+ // Check that we don't have same operands. No need to reorder if operands
+ // are just perfect diamond or shuffled diamond match. Do not do it only
+ // for possible broadcasts or non-power of 2 number of scalars (just for
+ // now).
+ auto &&SkipReordering = [this]() {
+ SmallPtrSet<Value *, 4> UniqueValues;
+ ArrayRef<OperandData> Op0 = OpsVec.front();
+ for (const OperandData &Data : Op0)
+ UniqueValues.insert(Data.V);
+ for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
+ if (any_of(Op, [&UniqueValues](const OperandData &Data) {
+ return !UniqueValues.contains(Data.V);
+ }))
+ return false;
+ }
+ // TODO: Check if we can remove a check for non-power-2 number of
+ // scalars after full support of non-power-2 vectorization.
+ return UniqueValues.size() != 2 && isPowerOf2_32(UniqueValues.size());
+ };
+
// If the initial strategy fails for any of the operand indexes, then we
// perform reordering again in a second pass. This helps avoid assigning
// high priority to the failed strategy, and should improve reordering for
// the non-failed operand indexes.
for (int Pass = 0; Pass != 2; ++Pass) {
+ // Check if no need to reorder operands since they're are perfect or
+ // shuffled diamond match.
+ // Need to to do it to avoid extra external use cost counting for
+ // shuffled matches, which may cause regressions.
+ if (SkipReordering())
+ break;
// Skip the second pass if the first pass did not fail.
bool StrategyFailed = false;
// Mark all operand data as free to use.
@@ -1792,9 +1966,10 @@ private:
if (Operands.size() < OpIdx + 1)
Operands.resize(OpIdx + 1);
assert(Operands[OpIdx].empty() && "Already resized?");
- Operands[OpIdx].resize(Scalars.size());
- for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
- Operands[OpIdx][Lane] = OpVL[Lane];
+ assert(OpVL.size() <= Scalars.size() &&
+ "Number of operands is greater than the number of scalars.");
+ Operands[OpIdx].resize(OpVL.size());
+ copy(OpVL, Operands[OpIdx].begin());
}
/// Set the operands of this bundle in their original order.
@@ -1944,7 +2119,7 @@ private:
if (ReuseShuffleIndices.empty())
dbgs() << "Empty";
else
- for (unsigned ReuseIdx : ReuseShuffleIndices)
+ for (int ReuseIdx : ReuseShuffleIndices)
dbgs() << ReuseIdx << ", ";
dbgs() << "\n";
dbgs() << "ReorderIndices: ";
@@ -2819,6 +2994,50 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
return None;
}
+Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
+ bool TopToBottom) {
+ // No need to reorder if need to shuffle reuses, still need to shuffle the
+ // node.
+ if (!TE.ReuseShuffleIndices.empty())
+ return None;
+ if (TE.State == TreeEntry::Vectorize &&
+ (isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE.getMainOp()) ||
+ (TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&
+ !TE.isAltShuffle())
+ return TE.ReorderIndices;
+ if (TE.State == TreeEntry::NeedToGather) {
+ // TODO: add analysis of other gather nodes with extractelement
+ // instructions and other values/instructions, not only undefs.
+ if (((TE.getOpcode() == Instruction::ExtractElement &&
+ !TE.isAltShuffle()) ||
+ (all_of(TE.Scalars,
+ [](Value *V) {
+ return isa<UndefValue, ExtractElementInst>(V);
+ }) &&
+ any_of(TE.Scalars,
+ [](Value *V) { return isa<ExtractElementInst>(V); }))) &&
+ all_of(TE.Scalars,
+ [](Value *V) {
+ auto *EE = dyn_cast<ExtractElementInst>(V);
+ return !EE || isa<FixedVectorType>(EE->getVectorOperandType());
+ }) &&
+ allSameType(TE.Scalars)) {
+ // Check that gather of extractelements can be represented as
+ // just a shuffle of a single vector.
+ OrdersType CurrentOrder;
+ bool Reuse = canReuseExtract(TE.Scalars, TE.getMainOp(), CurrentOrder);
+ if (Reuse || !CurrentOrder.empty()) {
+ if (!CurrentOrder.empty())
+ fixupOrderingIndices(CurrentOrder);
+ return CurrentOrder;
+ }
+ }
+ if (Optional<OrdersType> CurrentOrder = findReusedOrderedScalars(TE))
+ return CurrentOrder;
+ }
+ return None;
+}
+
void BoUpSLP::reorderTopToBottom() {
// Maps VF to the graph nodes.
DenseMap<unsigned, SmallPtrSet<TreeEntry *, 4>> VFToOrderedEntries;
@@ -2826,42 +3045,15 @@ void BoUpSLP::reorderTopToBottom() {
// their ordering.
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
// Find all reorderable nodes with the given VF.
- // Currently the are vectorized loads,extracts + some gathering of extracts.
+ // Currently the are vectorized stores,loads,extracts + some gathering of
+ // extracts.
for_each(VectorizableTree, [this, &VFToOrderedEntries, &GathersToOrders](
const std::unique_ptr<TreeEntry> &TE) {
- // No need to reorder if need to shuffle reuses, still need to shuffle the
- // node.
- if (!TE->ReuseShuffleIndices.empty())
- return;
- if (TE->State == TreeEntry::Vectorize &&
- isa<LoadInst, ExtractElementInst, ExtractValueInst, StoreInst,
- InsertElementInst>(TE->getMainOp()) &&
- !TE->isAltShuffle()) {
+ if (Optional<OrdersType> CurrentOrder =
+ getReorderingData(*TE.get(), /*TopToBottom=*/true)) {
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
- return;
- }
- if (TE->State == TreeEntry::NeedToGather) {
- if (TE->getOpcode() == Instruction::ExtractElement &&
- !TE->isAltShuffle() &&
- isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
- ->getVectorOperandType()) &&
- allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
- // Check that gather of extractelements can be represented as
- // just a shuffle of a single vector.
- OrdersType CurrentOrder;
- bool Reuse =
- canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
- if (Reuse || !CurrentOrder.empty()) {
- VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
- GathersToOrders.try_emplace(TE.get(), CurrentOrder);
- return;
- }
- }
- if (Optional<OrdersType> CurrentOrder =
- findReusedOrderedScalars(*TE.get())) {
- VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ if (TE->State != TreeEntry::Vectorize)
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
- }
}
});
@@ -2993,44 +3185,11 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
const std::unique_ptr<TreeEntry> &TE) {
if (TE->State != TreeEntry::Vectorize)
NonVectorized.push_back(TE.get());
- // No need to reorder if need to shuffle reuses, still need to shuffle the
- // node.
- if (!TE->ReuseShuffleIndices.empty())
- return;
- if (TE->State == TreeEntry::Vectorize &&
- isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE->getMainOp()) &&
- !TE->isAltShuffle()) {
+ if (Optional<OrdersType> CurrentOrder =
+ getReorderingData(*TE.get(), /*TopToBottom=*/false)) {
OrderedEntries.insert(TE.get());
- return;
- }
- if (TE->State == TreeEntry::NeedToGather) {
- if (TE->getOpcode() == Instruction::ExtractElement &&
- !TE->isAltShuffle() &&
- isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
- ->getVectorOperandType()) &&
- allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
- // Check that gather of extractelements can be represented as
- // just a shuffle of a single vector with a single user only.
- OrdersType CurrentOrder;
- bool Reuse =
- canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
- if ((Reuse || !CurrentOrder.empty()) &&
- !any_of(VectorizableTree,
- [&TE](const std::unique_ptr<TreeEntry> &Entry) {
- return Entry->State == TreeEntry::NeedToGather &&
- Entry.get() != TE.get() &&
- Entry->isSame(TE->Scalars);
- })) {
- OrderedEntries.insert(TE.get());
- GathersToOrders.try_emplace(TE.get(), CurrentOrder);
- return;
- }
- }
- if (Optional<OrdersType> CurrentOrder =
- findReusedOrderedScalars(*TE.get())) {
- OrderedEntries.insert(TE.get());
+ if (TE->State != TreeEntry::Vectorize)
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
- }
}
});
@@ -3392,9 +3551,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check that every instruction appears once in this bundle.
DenseMap<Value *, unsigned> UniquePositions;
for (Value *V : VL) {
+ if (isConstant(V)) {
+ ReuseShuffleIndicies.emplace_back(
+ isa<UndefValue>(V) ? UndefMaskElem : UniqueValues.size());
+ UniqueValues.emplace_back(V);
+ continue;
+ }
auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
- ReuseShuffleIndicies.emplace_back(isa<UndefValue>(V) ? -1
- : Res.first->second);
+ ReuseShuffleIndicies.emplace_back(Res.first->second);
if (Res.second)
UniqueValues.emplace_back(V);
}
@@ -3404,6 +3568,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
} else {
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
if (NumUniqueScalarValues <= 1 ||
+ (UniquePositions.size() == 1 && all_of(UniqueValues,
+ [](Value *V) {
+ return isa<UndefValue>(V) ||
+ !isConstant(V);
+ })) ||
!llvm::isPowerOf2_32(NumUniqueScalarValues)) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
@@ -3508,11 +3677,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
- // If any of the scalars is marked as a value that needs to stay scalar, then
- // we need to gather the scalars.
// The reduction nodes (stored in UserIgnoreList) also should stay scalar.
for (Value *V : VL) {
- if (MustGather.count(V) || is_contained(UserIgnoreList, V)) {
+ if (is_contained(UserIgnoreList, V)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
@@ -4219,10 +4386,17 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
SmallVectorImpl<unsigned> &CurrentOrder) const {
- Instruction *E0 = cast<Instruction>(OpValue);
- assert(E0->getOpcode() == Instruction::ExtractElement ||
- E0->getOpcode() == Instruction::ExtractValue);
- assert(E0->getOpcode() == getSameOpcode(VL).getOpcode() && "Invalid opcode");
+ const auto *It = find_if(VL, [](Value *V) {
+ return isa<ExtractElementInst, ExtractValueInst>(V);
+ });
+ assert(It != VL.end() && "Expected at least one extract instruction.");
+ auto *E0 = cast<Instruction>(*It);
+ assert(all_of(VL,
+ [](Value *V) {
+ return isa<UndefValue, ExtractElementInst, ExtractValueInst>(
+ V);
+ }) &&
+ "Invalid opcode");
// Check if all of the extracts come from the same vector and from the
// correct offset.
Value *Vec = E0->getOperand(0);
@@ -4255,23 +4429,28 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
// Also, later we can check that all the indices are used and we have a
// consecutive access in the extract instructions, by checking that no
// element of CurrentOrder still has value E + 1.
- CurrentOrder.assign(E, E + 1);
+ CurrentOrder.assign(E, E);
unsigned I = 0;
for (; I < E; ++I) {
- auto *Inst = cast<Instruction>(VL[I]);
+ auto *Inst = dyn_cast<Instruction>(VL[I]);
+ if (!Inst)
+ continue;
if (Inst->getOperand(0) != Vec)
break;
+ if (auto *EE = dyn_cast<ExtractElementInst>(Inst))
+ if (isa<UndefValue>(EE->getIndexOperand()))
+ continue;
Optional<unsigned> Idx = getExtractIndex(Inst);
if (!Idx)
break;
const unsigned ExtIdx = *Idx;
if (ExtIdx != I) {
- if (ExtIdx >= E || CurrentOrder[ExtIdx] != E + 1)
+ if (ExtIdx >= E || CurrentOrder[ExtIdx] != E)
break;
ShouldKeepOrder = false;
CurrentOrder[ExtIdx] = I;
} else {
- if (CurrentOrder[I] != E + 1)
+ if (CurrentOrder[I] != E)
break;
CurrentOrder[I] = I;
}
@@ -4287,8 +4466,8 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
bool BoUpSLP::areAllUsersVectorized(Instruction *I,
ArrayRef<Value *> VectorizedVals) const {
return (I->hasOneUse() && is_contained(VectorizedVals, I)) ||
- llvm::all_of(I->users(), [this](User *U) {
- return ScalarToTreeEntry.count(U) > 0;
+ all_of(I->users(), [this](User *U) {
+ return ScalarToTreeEntry.count(U) > 0 || MustGather.contains(U);
});
}
@@ -4348,6 +4527,10 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
for (auto *V : VL) {
++Idx;
+ // Need to exclude undefs from analysis.
+ if (isa<UndefValue>(V) || Mask[Idx] == UndefMaskElem)
+ continue;
+
// Reached the start of a new vector registers.
if (Idx % EltsPerVector == 0) {
AllConsecutive = true;
@@ -4357,9 +4540,11 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
// Check all extracts for a vector register on the target directly
// extract values in order.
unsigned CurrentIdx = *getExtractIndex(cast<Instruction>(V));
- unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
- AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
- CurrentIdx % EltsPerVector == Idx % EltsPerVector;
+ if (!isa<UndefValue>(VL[Idx - 1]) && Mask[Idx - 1] != UndefMaskElem) {
+ unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
+ AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
+ CurrentIdx % EltsPerVector == Idx % EltsPerVector;
+ }
if (AllConsecutive)
continue;
@@ -4442,9 +4627,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// FIXME: it tries to fix a problem with MSVC buildbots.
TargetTransformInfo &TTIRef = *TTI;
auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy,
- VectorizedVals](InstructionCost &Cost,
- bool IsGather) {
+ VectorizedVals, E](InstructionCost &Cost) {
DenseMap<Value *, int> ExtractVectorsTys;
+ SmallPtrSet<Value *, 4> CheckedExtracts;
for (auto *V : VL) {
if (isa<UndefValue>(V))
continue;
@@ -4452,7 +4637,12 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// instruction itself is not going to be vectorized, consider this
// instruction as dead and remove its cost from the final cost of the
// vectorized tree.
- if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals))
+ // Also, avoid adjusting the cost for extractelements with multiple uses
+ // in different graph entries.
+ const TreeEntry *VE = getTreeEntry(V);
+ if (!CheckedExtracts.insert(V).second ||
+ !areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
+ (VE && VE != E))
continue;
auto *EE = cast<ExtractElementInst>(V);
Optional<unsigned> EEIdx = getExtractIndex(EE);
@@ -4549,11 +4739,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
}
return GatherCost;
}
- if (isSplat(VL)) {
- // Found the broadcasting of the single scalar, calculate the cost as the
- // broadcast.
- return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
- }
if ((E->getOpcode() == Instruction::ExtractElement ||
all_of(E->Scalars,
[](Value *V) {
@@ -4571,13 +4756,20 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// single input vector or of 2 input vectors.
InstructionCost Cost =
computeExtractCost(VL, VecTy, *ShuffleKind, Mask, *TTI);
- AdjustExtractsCost(Cost, /*IsGather=*/true);
+ AdjustExtractsCost(Cost);
if (NeedToShuffleReuses)
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
FinalVecTy, E->ReuseShuffleIndices);
return Cost;
}
}
+ if (isSplat(VL)) {
+ // Found the broadcasting of the single scalar, calculate the cost as the
+ // broadcast.
+ assert(VecTy == FinalVecTy &&
+ "No reused scalars expected for broadcast.");
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
+ }
InstructionCost ReuseShuffleCost = 0;
if (NeedToShuffleReuses)
ReuseShuffleCost = TTI->getShuffleCost(
@@ -4755,7 +4947,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, I);
}
} else {
- AdjustExtractsCost(CommonCost, /*IsGather=*/false);
+ AdjustExtractsCost(CommonCost);
}
return CommonCost;
}
@@ -5211,15 +5403,15 @@ static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
FoundOr = true;
}
// Check if the input is an extended load of the required or/shift expression.
- Value *LoadPtr;
+ Value *Load;
if ((MustMatchOrInst && !FoundOr) || ZextLoad == Root ||
- !match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr)))))
+ !match(ZextLoad, m_ZExt(m_Value(Load))) || !isa<LoadInst>(Load))
return false;
// Require that the total load bit width is a legal integer type.
// For example, <8 x i8> --> i64 is a legal integer on a 64-bit target.
// But <16 x i8> --> i128 is not, so the backend probably can't reduce it.
- Type *SrcTy = LoadPtr->getType()->getPointerElementType();
+ Type *SrcTy = Load->getType();
unsigned LoadBitWidth = SrcTy->getIntegerBitWidth() * NumElts;
if (!TTI->isTypeLegal(IntegerType::get(Root->getContext(), LoadBitWidth)))
return false;
@@ -9061,8 +9253,7 @@ private:
"A call to the llvm.fmuladd intrinsic is not handled yet");
++NumVectorInstructions;
- return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind,
- ReductionOps.back());
+ return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind);
}
};
@@ -9473,6 +9664,59 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
return Changed;
}
+/// Compare two cmp instructions. If IsCompatibility is true, function returns
+/// true if 2 cmps have same/swapped predicates and mos compatible corresponding
+/// operands. If IsCompatibility is false, function implements strict weak
+/// ordering relation between two cmp instructions, returning true if the first
+/// instruction is "less" than the second, i.e. its predicate is less than the
+/// predicate of the second or the operands IDs are less than the operands IDs
+/// of the second cmp instruction.
+template <bool IsCompatibility>
+static bool compareCmp(Value *V, Value *V2,
+ function_ref<bool(Instruction *)> IsDeleted) {
+ auto *CI1 = cast<CmpInst>(V);
+ auto *CI2 = cast<CmpInst>(V2);
+ if (IsDeleted(CI2) || !isValidElementType(CI2->getType()))
+ return false;
+ if (CI1->getOperand(0)->getType()->getTypeID() <
+ CI2->getOperand(0)->getType()->getTypeID())
+ return !IsCompatibility;
+ if (CI1->getOperand(0)->getType()->getTypeID() >
+ CI2->getOperand(0)->getType()->getTypeID())
+ return false;
+ CmpInst::Predicate Pred1 = CI1->getPredicate();
+ CmpInst::Predicate Pred2 = CI2->getPredicate();
+ CmpInst::Predicate SwapPred1 = CmpInst::getSwappedPredicate(Pred1);
+ CmpInst::Predicate SwapPred2 = CmpInst::getSwappedPredicate(Pred2);
+ CmpInst::Predicate BasePred1 = std::min(Pred1, SwapPred1);
+ CmpInst::Predicate BasePred2 = std::min(Pred2, SwapPred2);
+ if (BasePred1 < BasePred2)
+ return !IsCompatibility;
+ if (BasePred1 > BasePred2)
+ return false;
+ // Compare operands.
+ bool LEPreds = Pred1 <= Pred2;
+ bool GEPreds = Pred1 >= Pred2;
+ for (int I = 0, E = CI1->getNumOperands(); I < E; ++I) {
+ auto *Op1 = CI1->getOperand(LEPreds ? I : E - I - 1);
+ auto *Op2 = CI2->getOperand(GEPreds ? I : E - I - 1);
+ if (Op1->getValueID() < Op2->getValueID())
+ return !IsCompatibility;
+ if (Op1->getValueID() > Op2->getValueID())
+ return false;
+ if (auto *I1 = dyn_cast<Instruction>(Op1))
+ if (auto *I2 = dyn_cast<Instruction>(Op2)) {
+ if (I1->getParent() != I2->getParent())
+ return false;
+ InstructionsState S = getSameOpcode({I1, I2});
+ if (S.getOpcode())
+ continue;
+ return false;
+ }
+ }
+ return IsCompatibility;
+}
+
bool SLPVectorizerPass::vectorizeSimpleInstructions(
SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R,
bool AtTerminator) {
@@ -9504,37 +9748,16 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions(
}
// Try to vectorize list of compares.
// Sort by type, compare predicate, etc.
- // TODO: Add analysis on the operand opcodes (profitable to vectorize
- // instructions with same/alternate opcodes/const values).
auto &&CompareSorter = [&R](Value *V, Value *V2) {
- auto *CI1 = cast<CmpInst>(V);
- auto *CI2 = cast<CmpInst>(V2);
- if (R.isDeleted(CI2) || !isValidElementType(CI2->getType()))
- return false;
- if (CI1->getOperand(0)->getType()->getTypeID() <
- CI2->getOperand(0)->getType()->getTypeID())
- return true;
- if (CI1->getOperand(0)->getType()->getTypeID() >
- CI2->getOperand(0)->getType()->getTypeID())
- return false;
- return CI1->getPredicate() < CI2->getPredicate() ||
- (CI1->getPredicate() > CI2->getPredicate() &&
- CI1->getPredicate() <
- CmpInst::getSwappedPredicate(CI2->getPredicate()));
+ return compareCmp<false>(V, V2,
+ [&R](Instruction *I) { return R.isDeleted(I); });
};
auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) {
if (V1 == V2)
return true;
- auto *CI1 = cast<CmpInst>(V1);
- auto *CI2 = cast<CmpInst>(V2);
- if (R.isDeleted(CI2) || !isValidElementType(CI2->getType()))
- return false;
- if (CI1->getOperand(0)->getType() != CI2->getOperand(0)->getType())
- return false;
- return CI1->getPredicate() == CI2->getPredicate() ||
- CI1->getPredicate() ==
- CmpInst::getSwappedPredicate(CI2->getPredicate());
+ return compareCmp<true>(V1, V2,
+ [&R](Instruction *I) { return R.isDeleted(I); });
};
auto Limit = [&R](Value *V) {
unsigned EltSize = R.getVectorElementSize(V);
@@ -9592,10 +9815,15 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
return true;
if (Opcodes1.size() > Opcodes2.size())
return false;
+ Optional<bool> ConstOrder;
for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
// Undefs are compatible with any other value.
- if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
+ if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I])) {
+ if (!ConstOrder)
+ ConstOrder =
+ !isa<UndefValue>(Opcodes1[I]) && isa<UndefValue>(Opcodes2[I]);
continue;
+ }
if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
@@ -9614,14 +9842,17 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
continue;
return I1->getOpcode() < I2->getOpcode();
}
- if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
+ if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I])) {
+ if (!ConstOrder)
+ ConstOrder = Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID();
continue;
+ }
if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
return true;
if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
return false;
}
- return false;
+ return ConstOrder && *ConstOrder;
};
auto AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) {
if (V1 == V2)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 44b5e1df0839..1d9e71663cd2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -374,8 +374,7 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
assert((SplitAt == end() || SplitAt->getParent() == this) &&
"can only split at a position in the same block");
- SmallVector<VPBlockBase *, 2> Succs(getSuccessors().begin(),
- getSuccessors().end());
+ SmallVector<VPBlockBase *, 2> Succs(successors());
// First, disconnect the current block from its successors.
for (VPBlockBase *Succ : Succs)
VPBlockUtils::disconnectBlocks(this, Succ);
@@ -642,6 +641,7 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB,
void VPInstruction::generateInstruction(VPTransformState &State,
unsigned Part) {
IRBuilder<> &Builder = State.Builder;
+ Builder.SetCurrentDebugLocation(DL);
if (Instruction::isBinaryOp(getOpcode())) {
Value *A = State.get(getOperand(0), Part);
@@ -768,6 +768,11 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
O << " ";
Operand->printAsOperand(O, SlotTracker);
}
+
+ if (DL) {
+ O << ", !dbg ";
+ DL.print(O);
+ }
}
#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 810dd5030f95..f4a1883e35d5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -39,6 +39,7 @@
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/InstructionCost.h"
#include <algorithm>
@@ -51,6 +52,7 @@ namespace llvm {
class BasicBlock;
class DominatorTree;
+class InductionDescriptor;
class InnerLoopVectorizer;
class LoopInfo;
class raw_ostream;
@@ -500,6 +502,8 @@ public:
const VPBlocksTy &getSuccessors() const { return Successors; }
VPBlocksTy &getSuccessors() { return Successors; }
+ iterator_range<VPBlockBase **> successors() { return Successors; }
+
const VPBlocksTy &getPredecessors() const { return Predecessors; }
VPBlocksTy &getPredecessors() { return Predecessors; }
@@ -795,6 +799,7 @@ private:
typedef unsigned char OpcodeTy;
OpcodeTy Opcode;
FastMathFlags FMF;
+ DebugLoc DL;
/// Utility method serving execute(): generates a single instance of the
/// modeled instruction.
@@ -804,12 +809,14 @@ protected:
void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
public:
- VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
+ VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL)
: VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands),
- VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {}
+ VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode),
+ DL(DL) {}
- VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
- : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {}
+ VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
+ DebugLoc DL = {})
+ : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL) {}
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPValue *V) {
@@ -818,7 +825,7 @@ public:
VPInstruction *clone() const {
SmallVector<VPValue *, 2> Operands(operands());
- return new VPInstruction(Opcode, Operands);
+ return new VPInstruction(Opcode, Operands, DL);
}
/// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -1003,21 +1010,22 @@ public:
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their vector and scalar values.
-class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
+class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
PHINode *IV;
+ const InductionDescriptor &IndDesc;
public:
- VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, Instruction *Cast,
- TruncInst *Trunc = nullptr)
- : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), IV(IV) {
- if (Trunc)
- new VPValue(Trunc, this);
- else
- new VPValue(IV, this);
+ VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
+ const InductionDescriptor &IndDesc)
+ : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this),
+ IV(IV), IndDesc(IndDesc) {}
+
+ VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
+ const InductionDescriptor &IndDesc,
+ TruncInst *Trunc)
+ : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this),
+ IV(IV), IndDesc(IndDesc) {}
- if (Cast)
- new VPValue(Cast, this);
- }
~VPWidenIntOrFpInductionRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -1038,13 +1046,6 @@ public:
/// Returns the start value of the induction.
VPValue *getStartValue() { return getOperand(0); }
- /// Returns the cast VPValue, if one is attached, or nullptr otherwise.
- VPValue *getCastValue() {
- if (getNumDefinedValues() != 2)
- return nullptr;
- return getVPValue(1);
- }
-
/// Returns the first defined value as TruncInst, if it is one or nullptr
/// otherwise.
TruncInst *getTruncInst() {
@@ -1053,6 +1054,9 @@ public:
const TruncInst *getTruncInst() const {
return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
}
+
+ /// Returns the induction descriptor for the recipe.
+ const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
};
/// A recipe for handling first order recurrences and pointer inductions. For
@@ -1169,7 +1173,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPWidenPHIRecipe {
/// operand.
class VPReductionPHIRecipe : public VPWidenPHIRecipe {
/// Descriptor for the reduction.
- RecurrenceDescriptor &RdxDesc;
+ const RecurrenceDescriptor &RdxDesc;
/// The phi is part of an in-loop reduction.
bool IsInLoop;
@@ -1180,7 +1184,7 @@ class VPReductionPHIRecipe : public VPWidenPHIRecipe {
public:
/// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
/// RdxDesc.
- VPReductionPHIRecipe(PHINode *Phi, RecurrenceDescriptor &RdxDesc,
+ VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
VPValue &Start, bool IsInLoop = false,
bool IsOrdered = false)
: VPWidenPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi, &Start),
@@ -1210,7 +1214,9 @@ public:
VPSlotTracker &SlotTracker) const override;
#endif
- RecurrenceDescriptor &getRecurrenceDescriptor() { return RdxDesc; }
+ const RecurrenceDescriptor &getRecurrenceDescriptor() const {
+ return RdxDesc;
+ }
/// Returns true, if the phi is part of an ordered reduction.
bool isOrdered() const { return IsOrdered; }
@@ -1340,13 +1346,13 @@ public:
/// The Operands are {ChainOp, VecOp, [Condition]}.
class VPReductionRecipe : public VPRecipeBase, public VPValue {
/// The recurrence decriptor for the reduction in question.
- RecurrenceDescriptor *RdxDesc;
+ const RecurrenceDescriptor *RdxDesc;
/// Pointer to the TTI, needed to create the target reduction
const TargetTransformInfo *TTI;
public:
- VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp,
- VPValue *VecOp, VPValue *CondOp,
+ VPReductionRecipe(const RecurrenceDescriptor *R, Instruction *I,
+ VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
const TargetTransformInfo *TTI)
: VPRecipeBase(VPRecipeBase::VPReductionSC, {ChainOp, VecOp}),
VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), TTI(TTI) {
@@ -2252,6 +2258,12 @@ public:
return map_range(Operands, Fn);
}
+ /// Returns true if \p VPV is uniform after vectorization.
+ bool isUniformAfterVectorization(VPValue *VPV) const {
+ auto RepR = dyn_cast_or_null<VPReplicateRecipe>(VPV->getDef());
+ return !VPV->getDef() || (RepR && RepR->isUniform());
+ }
+
private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
@@ -2340,18 +2352,23 @@ public:
/// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
/// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
- /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr
- /// has more than one successor, its conditional bit is propagated to \p
- /// NewBlock. \p NewBlock must have neither successors nor predecessors.
+ /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
+ /// successors are moved from \p BlockPtr to \p NewBlock and \p BlockPtr's
+ /// conditional bit is propagated to \p NewBlock. \p NewBlock must have
+ /// neither successors nor predecessors.
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
assert(NewBlock->getSuccessors().empty() &&
- "Can't insert new block with successors.");
- // TODO: move successors from BlockPtr to NewBlock when this functionality
- // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr
- // already has successors.
- BlockPtr->setOneSuccessor(NewBlock);
- NewBlock->setPredecessors({BlockPtr});
+ NewBlock->getPredecessors().empty() &&
+ "Can't insert new block with predecessors or successors.");
NewBlock->setParent(BlockPtr->getParent());
+ SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
+ for (VPBlockBase *Succ : Succs) {
+ disconnectBlocks(BlockPtr, Succ);
+ connectBlocks(NewBlock, Succ);
+ }
+ NewBlock->setCondBit(BlockPtr->getCondBit());
+ BlockPtr->setCondBit(nullptr);
+ connectBlocks(BlockPtr, NewBlock);
}
/// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
@@ -2394,6 +2411,31 @@ public:
To->removePredecessor(From);
}
+ /// Try to merge \p Block into its single predecessor, if \p Block is a
+ /// VPBasicBlock and its predecessor has a single successor. Returns a pointer
+ /// to the predecessor \p Block was merged into or nullptr otherwise.
+ static VPBasicBlock *tryToMergeBlockIntoPredecessor(VPBlockBase *Block) {
+ auto *VPBB = dyn_cast<VPBasicBlock>(Block);
+ auto *PredVPBB =
+ dyn_cast_or_null<VPBasicBlock>(Block->getSinglePredecessor());
+ if (!VPBB || !PredVPBB || PredVPBB->getNumSuccessors() != 1)
+ return nullptr;
+
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB))
+ R.moveBefore(*PredVPBB, PredVPBB->end());
+ VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
+ auto *ParentRegion = cast<VPRegionBlock>(Block->getParent());
+ if (ParentRegion->getExit() == Block)
+ ParentRegion->setExit(PredVPBB);
+ SmallVector<VPBlockBase *> Successors(Block->successors());
+ for (auto *Succ : Successors) {
+ VPBlockUtils::disconnectBlocks(Block, Succ);
+ VPBlockUtils::connectBlocks(PredVPBB, Succ);
+ }
+ delete Block;
+ return PredVPBB;
+ }
+
/// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
static bool isBackEdge(const VPBlockBase *FromBlock,
const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index ac3b3505dc34..86ecd6817873 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -50,14 +50,14 @@ VPValue *VPlanPredicator::getOrCreateNotPredicate(VPBasicBlock *PredBB,
case EdgeType::FALSE_EDGE:
// CurrBB is the False successor of PredBB - compute not of CBV.
- IntermediateVal = Builder.createNot(CBV);
+ IntermediateVal = Builder.createNot(CBV, {});
break;
}
// Now AND intermediate value with PredBB's block predicate if it has one.
VPValue *BP = PredBB->getPredicate();
if (BP)
- return Builder.createAnd(BP, IntermediateVal);
+ return Builder.createAnd(BP, IntermediateVal, {});
else
return IntermediateVal;
}
@@ -96,7 +96,7 @@ VPValue *VPlanPredicator::genPredicateTree(std::list<VPValue *> &Worklist) {
Worklist.pop_front();
// Create an OR of these values.
- VPValue *Or = Builder.createOr(LHS, RHS);
+ VPValue *Or = Builder.createOr(LHS, RHS, {});
// Push OR to the back of the worklist.
Worklist.push_back(Or);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
index c52c8a2229e8..9e19e172dea5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -467,8 +467,9 @@ VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
return markFailed();
assert(CombinedOperands.size() > 0 && "Need more some operands");
- auto *VPI = new VPInstruction(Opcode, CombinedOperands);
- VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
+ auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
+ auto *VPI = new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
+ VPI->setUnderlyingInstr(Inst);
LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " "
<< *cast<VPInstruction>(Values[0]) << "\n");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ded5bc04beb5..d2daf558c2c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -18,7 +18,8 @@ using namespace llvm;
void VPlanTransforms::VPInstructionsToVPRecipes(
Loop *OrigLoop, VPlanPtr &Plan,
- LoopVectorizationLegality::InductionList &Inductions,
+ function_ref<const InductionDescriptor *(PHINode *)>
+ GetIntOrFpInductionDescriptor,
SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE) {
auto *TopRegion = cast<VPRegionBlock>(Plan->getEntry());
@@ -44,11 +45,9 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
VPRecipeBase *NewRecipe = nullptr;
if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
- InductionDescriptor II = Inductions.lookup(Phi);
- if (II.getKind() == InductionDescriptor::IK_IntInduction ||
- II.getKind() == InductionDescriptor::IK_FpInduction) {
- VPValue *Start = Plan->getOrAddVPValue(II.getStartValue());
- NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, nullptr);
+ if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) {
+ VPValue *Start = Plan->getOrAddVPValue(II->getStartValue());
+ NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II);
} else {
Plan->addVPValue(Phi, VPPhi);
continue;
@@ -158,8 +157,7 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
// TODO: add ".cloned" suffix to name of Clone's VPValue.
Clone->insertBefore(SinkCandidate);
- SmallVector<VPUser *, 4> Users(SinkCandidate->user_begin(),
- SinkCandidate->user_end());
+ SmallVector<VPUser *, 4> Users(SinkCandidate->users());
for (auto *U : Users) {
auto *UI = cast<VPRecipeBase>(U);
if (UI->getParent() == SinkTo)
@@ -266,8 +264,7 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
VPValue *PredInst1 =
cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
- SmallVector<VPUser *> Users(Phi1ToMoveV->user_begin(),
- Phi1ToMoveV->user_end());
+ SmallVector<VPUser *> Users(Phi1ToMoveV->users());
for (VPUser *U : Users) {
auto *UI = dyn_cast<VPRecipeBase>(U);
if (!UI || UI->getParent() != Then2)
@@ -295,3 +292,35 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
delete ToDelete;
return Changed;
}
+
+void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) {
+ SmallVector<std::pair<VPRecipeBase *, VPValue *>> CastsToRemove;
+ for (auto &Phi : Plan.getEntry()->getEntryBasicBlock()->phis()) {
+ auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
+ if (!IV || IV->getTruncInst())
+ continue;
+
+ // Visit all casts connected to IV and in Casts. Collect them.
+ // remember them for removal.
+ auto &Casts = IV->getInductionDescriptor().getCastInsts();
+ VPValue *FindMyCast = IV;
+ for (Instruction *IRCast : reverse(Casts)) {
+ VPRecipeBase *FoundUserCast = nullptr;
+ for (auto *U : FindMyCast->users()) {
+ auto *UserCast = cast<VPRecipeBase>(U);
+ if (UserCast->getNumDefinedValues() == 1 &&
+ UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) {
+ FoundUserCast = UserCast;
+ break;
+ }
+ }
+ assert(FoundUserCast && "Missing a cast to remove");
+ CastsToRemove.emplace_back(FoundUserCast, IV);
+ FindMyCast = FoundUserCast->getVPSingleValue();
+ }
+ }
+ for (auto &E : CastsToRemove) {
+ E.first->getVPSingleValue()->replaceAllUsesWith(E.second);
+ E.first->eraseFromParent();
+ }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index c740f2c022da..a82a562d5e35 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -14,24 +14,37 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
#include "VPlan.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
namespace llvm {
+class InductionDescriptor;
class Instruction;
+class PHINode;
class ScalarEvolution;
struct VPlanTransforms {
/// Replaces the VPInstructions in \p Plan with corresponding
/// widen recipes.
- static void VPInstructionsToVPRecipes(
- Loop *OrigLoop, VPlanPtr &Plan,
- LoopVectorizationLegality::InductionList &Inductions,
- SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE);
+ static void
+ VPInstructionsToVPRecipes(Loop *OrigLoop, VPlanPtr &Plan,
+ function_ref<const InductionDescriptor *(PHINode *)>
+ GetIntOrFpInductionDescriptor,
+ SmallPtrSetImpl<Instruction *> &DeadInstructions,
+ ScalarEvolution &SE);
static bool sinkScalarOperands(VPlan &Plan);
static bool mergeReplicateRegions(VPlan &Plan);
+
+ /// Remove redundant casts of inductions.
+ ///
+ /// Such redundant casts are casts of induction variables that can be ignored,
+ /// because we already proved that the casted phi is equal to the uncasted phi
+ /// in the vectorized loop. There is no need to vectorize the cast - the same
+ /// value can be used for both the phi and casts in the vector loop.
+ static void removeRedundantInductionCasts(VPlan &Plan);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 6d6ea4eb30f1..7732d9367985 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -156,5 +156,31 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
RecipeI++;
}
}
+
+ const VPRegionBlock *TopRegion = cast<VPRegionBlock>(Plan.getEntry());
+ const VPBasicBlock *Entry = dyn_cast<VPBasicBlock>(TopRegion->getEntry());
+ if (!Entry) {
+ errs() << "VPlan entry block is not a VPBasicBlock\n";
+ return false;
+ }
+ const VPBasicBlock *Exit = dyn_cast<VPBasicBlock>(TopRegion->getExit());
+ if (!Exit) {
+ errs() << "VPlan exit block is not a VPBasicBlock\n";
+ return false;
+ }
+
+ for (const VPRegionBlock *Region :
+ VPBlockUtils::blocksOnly<const VPRegionBlock>(
+ depth_first(VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(
+ Plan.getEntry())))) {
+ if (Region->getEntry()->getNumPredecessors() != 0) {
+ errs() << "region entry block has predecessors\n";
+ return false;
+ }
+ if (Region->getExit()->getNumSuccessors() != 0) {
+ errs() << "region exit block has successors\n";
+ return false;
+ }
+ }
return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 57b11e9414ba..c0aedab2fed0 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -989,9 +989,9 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
if (!FixedVT)
return false;
- InstructionCost OriginalCost = TTI.getMemoryOpCost(
- Instruction::Load, LI->getType(), Align(LI->getAlignment()),
- LI->getPointerAddressSpace());
+ InstructionCost OriginalCost =
+ TTI.getMemoryOpCost(Instruction::Load, LI->getType(), LI->getAlign(),
+ LI->getPointerAddressSpace());
InstructionCost ScalarizedCost = 0;
Instruction *LastCheckedInst = LI;