aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-14 18:50:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-14 18:50:02 +0000
commit1f917f69ff07f09b6dbb670971f57f8efe718b84 (patch)
tree99293cbc1411737cd995dac10a99b2c40ef0944c /llvm/lib
parent145449b1e420787bb99721a429341fa6be3adfb6 (diff)
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/BranchProbabilityInfo.cpp11
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp15
-rw-r--r--llvm/lib/Analysis/GlobalsModRef.cpp16
-rw-r--r--llvm/lib/Analysis/IRSimilarityIdentifier.cpp4
-rw-r--r--llvm/lib/Analysis/InlineAdvisor.cpp13
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp17
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp45
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp6
-rw-r--r--llvm/lib/Analysis/Loads.cpp5
-rw-r--r--llvm/lib/Analysis/MemoryBuiltins.cpp4
-rw-r--r--llvm/lib/Analysis/MustExecute.cpp2
-rw-r--r--llvm/lib/Analysis/ProfileSummaryInfo.cpp12
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp44
-rw-r--r--llvm/lib/Analysis/TFUtils.cpp11
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp34
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp24
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp2
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp3
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp84
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp15
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp9
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp125
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def2
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp2
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp3
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp50
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp15
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp31
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp9
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp46
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp17
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineFunctionSplitter.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp7
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp1
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp8
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp2
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.h3
-rw-r--r--llvm/lib/CodeGen/SelectOptimize.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp126
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp93
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp38
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp166
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp99
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp160
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp6
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinker.cpp1
-rw-r--r--llvm/lib/DebugInfo/CodeView/CodeViewError.cpp5
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp4
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp10
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp29
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp8
-rw-r--r--llvm/lib/DebugInfo/MSF/MSFError.cpp7
-rw-r--r--llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp7
-rw-r--r--llvm/lib/DebugInfo/PDB/GenericError.cpp7
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/RawError.cpp7
-rw-r--r--llvm/lib/DebugInfo/Symbolize/Symbolize.cpp4
-rw-r--r--llvm/lib/Debuginfod/Debuginfod.cpp315
-rw-r--r--llvm/lib/Debuginfod/HTTPServer.cpp189
-rw-r--r--llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp30
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFF.cpp137
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp527
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h199
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp216
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp5
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp161
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp30
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLink.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/aarch64.cpp8
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp6
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LLJIT.cpp5
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp52
-rw-r--r--llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp10
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp8
-rw-r--r--llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp10
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp6
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp2
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPContext.cpp4
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp115
-rw-r--r--llvm/lib/IR/AsmWriter.cpp14
-rw-r--r--llvm/lib/IR/ConstantFold.cpp27
-rw-r--r--llvm/lib/IR/Constants.cpp183
-rw-r--r--llvm/lib/IR/ConstantsContext.h60
-rw-r--r--llvm/lib/IR/Core.cpp86
-rw-r--r--llvm/lib/IR/InlineAsm.cpp43
-rw-r--r--llvm/lib/IR/Instructions.cpp17
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp10
-rw-r--r--llvm/lib/IR/LLVMContextImpl.cpp5
-rw-r--r--llvm/lib/IR/Metadata.cpp7
-rw-r--r--llvm/lib/IR/Module.cpp12
-rw-r--r--llvm/lib/IR/OptBisect.cpp7
-rw-r--r--llvm/lib/IR/PassRegistry.cpp10
-rw-r--r--llvm/lib/InterfaceStub/IFSHandler.cpp21
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp52
-rw-r--r--llvm/lib/Linker/IRMover.cpp28
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp14
-rw-r--r--llvm/lib/MC/MCContext.cpp4
-rw-r--r--llvm/lib/MC/MCDisassembler/MCDisassembler.cpp4
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp2
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp92
-rw-r--r--llvm/lib/MC/MCSchedule.cpp8
-rw-r--r--llvm/lib/MC/MCSectionELF.cpp2
-rw-r--r--llvm/lib/MC/MCSectionXCOFF.cpp4
-rw-r--r--llvm/lib/ObjCopy/ConfigManager.cpp28
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp125
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.cpp24
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.h2
-rw-r--r--llvm/lib/Object/Decompressor.cpp7
-rw-r--r--llvm/lib/Object/ELF.cpp1
-rw-r--r--llvm/lib/Object/ELFObjectFile.cpp18
-rw-r--r--llvm/lib/Object/Error.cpp6
-rw-r--r--llvm/lib/ObjectYAML/DXContainerEmitter.cpp6
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp1
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp3
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp5
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMapping.cpp6
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp12
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp12
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp32
-rw-r--r--llvm/lib/ProfileData/SampleProf.cpp6
-rw-r--r--llvm/lib/ProfileData/SampleProfReader.cpp10
-rw-r--r--llvm/lib/ProfileData/SampleProfWriter.cpp11
-rw-r--r--llvm/lib/Support/CommandLine.cpp6
-rw-r--r--llvm/lib/Support/Compression.cpp52
-rw-r--r--llvm/lib/Support/ConvertUTF.cpp10
-rw-r--r--llvm/lib/Support/Error.cpp12
-rw-r--r--llvm/lib/Support/Process.cpp2
-rw-r--r--llvm/lib/Support/Unicode.cpp2
-rw-r--r--llvm/lib/Support/Unix/Process.inc8
-rw-r--r--llvm/lib/Support/VirtualFileSystem.cpp6
-rw-r--r--llvm/lib/Support/Windows/Signals.inc5
-rw-r--r--llvm/lib/Support/X86TargetParser.cpp6
-rw-r--r--llvm/lib/Support/raw_ostream.cpp2
-rw-r--r--llvm/lib/TableGen/Record.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td11
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp239
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h4
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrAtomics.td38
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td73
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA53.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA55.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA57.td7
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA64FX.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedAmpere1.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedCyclone.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM3.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM4.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM5.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedFalkor.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedKryo.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td2279
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedTSV110.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp47
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h6
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp29
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h4
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp57
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp35
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp14
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h1
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td34
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td10
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp53
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp47
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td76
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td27
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td77
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td27
-rw-r--r--llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp175
-rw-r--r--llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp127
-rw-r--r--llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp212
-rw-r--r--llvm/lib/Target/AMDGPU/GCNVOPDUtils.h32
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h6
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h1
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.h4
-rw-r--r--llvm/lib/Target/AMDGPU/R600MCInstLower.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td121
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td37
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp522
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h14
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td6
-rw-r--r--llvm/lib/Target/AMDGPU/VOPCInstructions.td2
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td6
-rw-r--r--llvm/lib/Target/ARC/ARCAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/ARM/ARM.td11
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp14
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp6
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp8
-rw-r--r--llvm/lib/Target/AVR/AVRAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp79
-rw-r--r--llvm/lib/Target/BPF/BPFAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/BPF/BTF.h2
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp15
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.td2
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td2
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/DirectX/DXIL.td8
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h5
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp4
-rw-r--r--llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h4
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td52
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td54
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp172
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFrameLowering.h18
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp56
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h5
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp569
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.h26
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp70
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.h10
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.td218
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp33
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp24
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp7
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/M68k/M68kAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h1
-rw-r--r--llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/Mips/MipsAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.cpp54
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp10
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h6
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp48
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp9
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h1
-rw-r--r--llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp20
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp10
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp17
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp634
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h13
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp105
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td85
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td36
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td16
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td28
-rw-r--r--llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp10
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleV.td28
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp10
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp95
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h174
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp52
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h41
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp34
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp101
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h6
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp15
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp17
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp113
-rw-r--r--llvm/lib/Target/TargetLoweringObjectFile.cpp7
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp15
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/VE/VEAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.td8
-rw-r--r--llvm/lib/Target/VE/VERegisterInfo.cpp203
-rw-r--r--llvm/lib/Target/VE/VVPISelLowering.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp7
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp1
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp33
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h8
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp22
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp22
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp18
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp25
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp27
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp17
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp2
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp8
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/X86/X86.h4
-rw-r--r--llvm/lib/Target/X86/X86.td3
-rw-r--r--llvm/lib/Target/X86/X86EvexToVex.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp193
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td38
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA3Info.cpp2
-rw-r--r--llvm/lib/Target/X86/X86InstrFoldTables.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td1
-rw-r--r--llvm/lib/Target/X86/X86InstrSystem.td9
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h3
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp4
-rw-r--r--llvm/lib/Target/X86/X86PartialReduction.cpp6
-rw-r--r--llvm/lib/Target/X86/X86ReturnThunks.cpp92
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp2
-rw-r--r--llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/XCore/XCoreAsmPrinter.cpp3
-rw-r--r--llvm/lib/ToolDrivers/llvm-lib/Options.td2
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp49
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp173
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/IROutliner.cpp34
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp9
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp177
-rw-r--r--llvm/lib/Transforms/IPO/SampleContextTracker.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp105
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp13
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp8
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp7
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp9
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp13
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp42
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp16
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp22
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/CGProfile.cpp3
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp139
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp1
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/ConstantHoisting.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp25
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp60
-rw-r--r--llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp17
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDistribute.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp29
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp19
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp47
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp65
-rw-r--r--llvm/lib/Transforms/Utils/LowerAtomic.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/MisExpect.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/ModuleUtils.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp30
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp43
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp205
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp74
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp189
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp30
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h51
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp733
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp18
-rw-r--r--llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp71
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h62
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp365
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp69
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp174
455 files changed, 12472 insertions, 4478 deletions
diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 1d880424e55c..428ae8975c30 100644
--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -629,9 +630,10 @@ computeUnlikelySuccessors(const BasicBlock *BB, Loop *L,
if (!CmpLHSConst || !llvm::is_contained(successors(BB), B))
continue;
// First collapse InstChain
+ const DataLayout &DL = BB->getModule()->getDataLayout();
for (Instruction *I : llvm::reverse(InstChain)) {
- CmpLHSConst = ConstantExpr::get(I->getOpcode(), CmpLHSConst,
- cast<Constant>(I->getOperand(1)), true);
+ CmpLHSConst = ConstantFoldBinaryOpOperands(
+ I->getOpcode(), CmpLHSConst, cast<Constant>(I->getOperand(1)), DL);
if (!CmpLHSConst)
break;
}
@@ -826,9 +828,8 @@ void BranchProbabilityInfo::computeEestimateBlockWeight(
if (auto BBWeight = getInitialEstimatedBlockWeight(BB))
// If we were able to find estimated weight for the block set it to this
// block and propagate up the IR.
- propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT,
- BBWeight.getValue(), BlockWorkList,
- LoopWorkList);
+ propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, BBWeight.value(),
+ BlockWorkList, LoopWorkList);
// BlockWorklist/LoopWorkList contains blocks/loops with at least one
// successor/exit having estimated weight. Try to propagate weight to such
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index a81041845052..aa4da27be4e5 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -30,6 +30,7 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Config/config.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantFold.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -1142,8 +1143,12 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
Ops.push_back(NewC);
}
- if (auto *CE = dyn_cast<ConstantExpr>(C))
- return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI);
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ if (Constant *Res =
+ ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI))
+ return Res;
+ return const_cast<Constant *>(C);
+ }
assert(isa<ConstantVector>(C));
return ConstantVector::get(Ops);
@@ -1339,7 +1344,9 @@ Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL))
return C;
- return ConstantExpr::get(Opcode, LHS, RHS);
+ if (ConstantExpr::isDesirableBinOp(Opcode))
+ return ConstantExpr::get(Opcode, LHS, RHS);
+ return ConstantFoldBinaryInstruction(Opcode, LHS, RHS);
}
Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,
@@ -1390,6 +1397,8 @@ Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
// Calculate constant result.
Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);
+ if (!C)
+ return nullptr;
// Flush denormal output if needed.
return FlushFPConstant(C, I, /* IsOutput */ true);
diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp
index e82d2fae9356..db6eae0d962a 100644
--- a/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -256,22 +256,6 @@ FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) {
return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
}
-FunctionModRefBehavior
-GlobalsAAResult::getModRefBehavior(const CallBase *Call) {
- FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
-
- if (!Call->hasOperandBundles())
- if (const Function *F = Call->getCalledFunction())
- if (FunctionInfo *FI = getFunctionInfo(F)) {
- if (!isModOrRefSet(FI->getModRefInfo()))
- Min = FMRB_DoesNotAccessMemory;
- else if (!isModSet(FI->getModRefInfo()))
- Min = FMRB_OnlyReadsMemory;
- }
-
- return FunctionModRefBehavior(AAResultBase::getModRefBehavior(Call) & Min);
-}
-
/// Returns the function info for the function, or null if we don't have
/// anything useful to say about it.
GlobalsAAResult::FunctionInfo *
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 3d51042f4da8..a681c528e690 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -184,8 +184,8 @@ CmpInst::Predicate IRInstructionData::getPredicate() const {
"Can only get a predicate from a compare instruction");
if (RevisedPredicate)
- return RevisedPredicate.getValue();
-
+ return RevisedPredicate.value();
+
return cast<CmpInst>(Inst)->getPredicate();
}
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index cf8592c41eda..3fafc3057a13 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -56,10 +56,10 @@ static cl::opt<int>
cl::desc("Scale to limit the cost of inline deferral"),
cl::init(2), cl::Hidden);
-static cl::opt<bool> AnnotateInlinePhase(
- "annotate-inline-phase", cl::Hidden, cl::init(false),
- cl::desc("If true, annotate inline advisor remarks "
- "with LTO and pass information."));
+static cl::opt<bool>
+ AnnotateInlinePhase("annotate-inline-phase", cl::Hidden, cl::init(false),
+ cl::desc("If true, annotate inline advisor remarks "
+ "with LTO and pass information."));
extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
@@ -514,8 +514,9 @@ void llvm::emitInlinedIntoBasedOnCost(
InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
Optional<InlineContext> IC)
: M(M), FAM(FAM), IC(IC),
- AnnotatedInlinePassName((IC && AnnotateInlinePhase) ? llvm::AnnotateInlinePassName(*IC)
- : DEBUG_TYPE) {
+ AnnotatedInlinePassName((IC && AnnotateInlinePhase)
+ ? llvm::AnnotateInlinePassName(*IC)
+ : DEBUG_TYPE) {
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
ImportedFunctionsStats =
std::make_unique<ImportedFunctionsInliningStatistics>();
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index e63497260e6e..9f8a5e472f01 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -131,6 +131,12 @@ static cl::opt<size_t>
cl::desc("Do not inline functions with a stack size "
"that exceeds the specified limit"));
+static cl::opt<size_t>
+ RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden,
+ cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller),
+ cl::desc("Do not inline recursive functions with a stack "
+ "size that exceeds the specified limit"));
+
static cl::opt<bool> OptComputeFullInlineCost(
"inline-cost-full", cl::Hidden,
cl::desc("Compute the full inline cost of a call site even when the cost "
@@ -702,7 +708,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
assert(BFI && "BFI must be available");
auto ProfileCount = BFI->getBlockProfileCount(BB);
assert(ProfileCount);
- if (ProfileCount.getValue() == 0)
+ if (ProfileCount.value() == 0)
ColdSize += Cost - CostAtBBStart;
}
@@ -827,7 +833,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
assert(ProfileCount);
- CurrentSavings *= ProfileCount.getValue();
+ CurrentSavings *= ProfileCount.value();
CycleSavings += CurrentSavings;
}
@@ -1781,12 +1787,12 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
// return min(A, B) if B is valid.
auto MinIfValid = [](int A, Optional<int> B) {
- return B ? std::min(A, B.getValue()) : A;
+ return B ? std::min(A, B.value()) : A;
};
// return max(A, B) if B is valid.
auto MaxIfValid = [](int A, Optional<int> B) {
- return B ? std::max(A, B.getValue()) : A;
+ return B ? std::max(A, B.value()) : A;
};
// Various bonus percentages. These are multiplied by Threshold to get the
@@ -2444,8 +2450,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
// If the caller is a recursive function then we don't want to inline
// functions which allocate a lot of stack space because it would increase
// the caller stack usage dramatically.
- if (IsCallerRecursive &&
- AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
+ if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) {
auto IR =
InlineResult::failure("recursive and allocates too much stack space");
if (ORE)
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 013e4d6489fa..4691aebbdfe1 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4849,12 +4849,6 @@ static Value *simplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues,
return UndefValue::get(PN->getType());
if (HasUndefInput) {
- // We cannot start executing a trapping constant expression on more control
- // flow paths.
- auto *C = dyn_cast<Constant>(CommonValue);
- if (C && C->canTrap())
- return nullptr;
-
// If we have a PHI node like phi(X, undef, X), where X is defined by some
// instruction, we cannot return X as the result of the PHI node unless it
// dominates the PHI block.
@@ -6117,8 +6111,8 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
Value *Op2 = Call->getArgOperand(2);
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
if (Value *V = simplifyFPOp({Op0, Op1, Op2}, {}, Q,
- FPI->getExceptionBehavior().getValue(),
- FPI->getRoundingMode().getValue()))
+ FPI->getExceptionBehavior().value(),
+ FPI->getRoundingMode().value()))
return V;
return nullptr;
}
@@ -6182,38 +6176,33 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
}
case Intrinsic::experimental_constrained_fadd: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return simplifyFAddInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
- FPI->getFastMathFlags(), Q,
- FPI->getExceptionBehavior().getValue(),
- FPI->getRoundingMode().getValue());
+ return simplifyFAddInst(
+ FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
+ Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value());
}
case Intrinsic::experimental_constrained_fsub: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return simplifyFSubInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
- FPI->getFastMathFlags(), Q,
- FPI->getExceptionBehavior().getValue(),
- FPI->getRoundingMode().getValue());
+ return simplifyFSubInst(
+ FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
+ Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value());
}
case Intrinsic::experimental_constrained_fmul: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return simplifyFMulInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
- FPI->getFastMathFlags(), Q,
- FPI->getExceptionBehavior().getValue(),
- FPI->getRoundingMode().getValue());
+ return simplifyFMulInst(
+ FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
+ Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value());
}
case Intrinsic::experimental_constrained_fdiv: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return simplifyFDivInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
- FPI->getFastMathFlags(), Q,
- FPI->getExceptionBehavior().getValue(),
- FPI->getRoundingMode().getValue());
+ return simplifyFDivInst(
+ FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
+ Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value());
}
case Intrinsic::experimental_constrained_frem: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return simplifyFRemInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
- FPI->getFastMathFlags(), Q,
- FPI->getExceptionBehavior().getValue(),
- FPI->getRoundingMode().getValue());
+ return simplifyFRemInst(
+ FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
+ Q, FPI->getExceptionBehavior().value(), FPI->getRoundingMode().value());
}
default:
return nullptr;
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 8a8e9e923b7c..d49b20798c82 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -921,7 +921,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueCast(
if (!LHSRes)
// More work to do before applying this transfer rule.
return None;
- const ConstantRange &LHSRange = LHSRes.getValue();
+ const ConstantRange &LHSRange = LHSRes.value();
const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth();
@@ -946,8 +946,8 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
// More work to do before applying this transfer rule.
return None;
- const ConstantRange &LHSRange = LHSRes.getValue();
- const ConstantRange &RHSRange = RHSRes.getValue();
+ const ConstantRange &LHSRange = LHSRes.value();
+ const ConstantRange &RHSRange = RHSRes.value();
return ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
}
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index bc1d82cf1480..938d950e6da7 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -405,7 +405,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, Align Alignment,
Instruction *ScanFrom,
const DominatorTree *DT,
const TargetLibraryInfo *TLI) {
- APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty));
+ TypeSize TySize = DL.getTypeStoreSize(Ty);
+ if (TySize.isScalable())
+ return false;
+ APInt Size(DL.getIndexTypeSizeInBits(V->getType()), TySize.getFixedValue());
return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT, TLI);
}
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 91501b04448e..f5b121c98ec4 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -501,10 +501,10 @@ Optional<StringRef> llvm::getAllocationFamily(const Value *I,
return None;
const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI);
if (AllocData)
- return mangledNameForMallocFamily(AllocData.getValue().Family);
+ return mangledNameForMallocFamily(AllocData.value().Family);
const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn);
if (FreeData)
- return mangledNameForMallocFamily(FreeData.getValue().Family);
+ return mangledNameForMallocFamily(FreeData.value().Family);
return None;
}
diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp
index 5cff986245b9..ad8322d7bd79 100644
--- a/llvm/lib/Analysis/MustExecute.cpp
+++ b/llvm/lib/Analysis/MustExecute.cpp
@@ -493,7 +493,7 @@ static V getOrCreateCachedOptional(K Key, DenseMap<K, Optional<V>> &Map,
Optional<V> &OptVal = Map[Key];
if (!OptVal)
OptVal = Fn(std::forward<ArgsTy>(args)...);
- return OptVal.getValue();
+ return OptVal.value();
}
const BasicBlock *
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 9d5fa6d0a41b..64844f534332 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -279,19 +279,19 @@ ProfileSummaryInfo::computeThreshold(int PercentileCutoff) const {
}
bool ProfileSummaryInfo::hasHugeWorkingSetSize() const {
- return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue();
+ return HasHugeWorkingSetSize && HasHugeWorkingSetSize.value();
}
bool ProfileSummaryInfo::hasLargeWorkingSetSize() const {
- return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue();
+ return HasLargeWorkingSetSize && HasLargeWorkingSetSize.value();
}
bool ProfileSummaryInfo::isHotCount(uint64_t C) const {
- return HotCountThreshold && C >= HotCountThreshold.getValue();
+ return HotCountThreshold && C >= HotCountThreshold.value();
}
bool ProfileSummaryInfo::isColdCount(uint64_t C) const {
- return ColdCountThreshold && C <= ColdCountThreshold.getValue();
+ return ColdCountThreshold && C <= ColdCountThreshold.value();
}
template <bool isHot>
@@ -299,9 +299,9 @@ bool ProfileSummaryInfo::isHotOrColdCountNthPercentile(int PercentileCutoff,
uint64_t C) const {
auto CountThreshold = computeThreshold(PercentileCutoff);
if (isHot)
- return CountThreshold && C >= CountThreshold.getValue();
+ return CountThreshold && C >= CountThreshold.value();
else
- return CountThreshold && C <= CountThreshold.getValue();
+ return CountThreshold && C <= CountThreshold.value();
}
bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff,
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 207f4df79e45..f61806bd1dad 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2319,9 +2319,13 @@ bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
return A == B;
}
-std::pair<SCEV::NoWrapFlags, bool /*Deduced*/>
+Optional<SCEV::NoWrapFlags>
ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp(
const OverflowingBinaryOperator *OBO) {
+ // It cannot be done any better.
+ if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap())
+ return None;
+
SCEV::NoWrapFlags Flags = SCEV::NoWrapFlags::FlagAnyWrap;
if (OBO->hasNoUnsignedWrap())
@@ -2331,13 +2335,10 @@ ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp(
bool Deduced = false;
- if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap())
- return {Flags, Deduced};
-
if (OBO->getOpcode() != Instruction::Add &&
OBO->getOpcode() != Instruction::Sub &&
OBO->getOpcode() != Instruction::Mul)
- return {Flags, Deduced};
+ return None;
const SCEV *LHS = getSCEV(OBO->getOperand(0));
const SCEV *RHS = getSCEV(OBO->getOperand(1));
@@ -2356,7 +2357,9 @@ ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp(
Deduced = true;
}
- return {Flags, Deduced};
+ if (Deduced)
+ return Flags;
+ return None;
}
// We're trying to construct a SCEV of type `Type' with `Ops' as operands and
@@ -4835,7 +4838,7 @@ public:
Optional<const SCEV *> Res =
compareWithBackedgeCondition(SI->getCondition());
if (Res) {
- bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne();
+ bool IsOne = cast<SCEVConstant>(Res.value())->getValue()->isOne();
Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue());
}
break;
@@ -4843,7 +4846,7 @@ public:
default: {
Optional<const SCEV *> Res = compareWithBackedgeCondition(I);
if (Res)
- Result = Res.getValue();
+ Result = Res.value();
break;
}
}
@@ -6583,8 +6586,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
// Check if the IR explicitly contains !range metadata.
Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
if (MDRange)
- ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(),
- RangeType);
+ ConservativeResult =
+ ConservativeResult.intersectWith(MDRange.value(), RangeType);
// Use facts about recurrences in the underlying IR. Note that add
// recurrences are AddRecExprs and thus don't hit this path. This
@@ -7365,6 +7368,8 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) {
Ops.push_back(II->getArgOperand(1));
return nullptr;
case Intrinsic::start_loop_iterations:
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
Ops.push_back(II->getArgOperand(0));
return nullptr;
default:
@@ -7816,8 +7821,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
return getAddExpr(ClampedX, Y, SCEV::FlagNUW);
}
case Intrinsic::start_loop_iterations:
- // A start_loop_iterations is just equivalent to the first operand for
- // SCEV purposes.
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ // A start_loop_iterations or llvm.annotation or llvm.prt.annotation is
+ // just eqivalent to the first operand for SCEV purposes.
return getSCEV(II->getArgOperand(0));
default:
break;
@@ -9517,14 +9524,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
}
return C;
}
- case scUDivExpr: {
- const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
- if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
- if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
- if (LHS->getType() == RHS->getType())
- return ConstantExpr::getUDiv(LHS, RHS);
- return nullptr;
- }
+ case scUDivExpr:
case scSMaxExpr:
case scUMaxExpr:
case scSMinExpr:
@@ -10632,7 +10632,7 @@ ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS,
getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred));
assert(ResultSwapped && "should be able to analyze both!");
- assert(ResultSwapped.getValue() != Result.getValue() &&
+ assert(ResultSwapped.value() != Result.value() &&
"monotonicity should flip as we flip the predicate");
}
#endif
@@ -11808,7 +11808,7 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
// Make sure L does not refer to a value from a potentially previous
// iteration of a loop.
- if (!properlyDominates(L, IncBB))
+ if (!properlyDominates(L, LBB))
return false;
if (!ProvedEasily(L, RHS))
return false;
diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp
index 203858c1cf06..682fc095b0e9 100644
--- a/llvm/lib/Analysis/TFUtils.cpp
+++ b/llvm/lib/Analysis/TFUtils.cpp
@@ -18,7 +18,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/JSON.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
@@ -49,19 +48,17 @@ using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
struct TFInitializer {
TFInitializer() {
- assert(!IsInitialized && "TFInitialized should be called only once");
int Argc = 1;
const char *Name = "";
const char **NamePtr = &Name;
TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
- IsInitialized = true;
}
- bool IsInitialized = false;
};
-llvm::ManagedStatic<TFInitializer> TFLibInitializer;
-
-bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
+bool ensureInitTF() {
+ static TFInitializer TFLibInitializer;
+ return true;
+}
TFGraphPtr createTFGraph() {
return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 66f61961d01b..6e34a8303c08 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -298,7 +298,7 @@ bool TargetTransformInfo::preferPredicateOverEpilogue(
return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
}
-bool TargetTransformInfo::emitGetActiveLaneMask() const {
+PredicationStyle TargetTransformInfo::emitGetActiveLaneMask() const {
return TTIImpl->emitGetActiveLaneMask();
}
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 9bcbe4a4cc1e..560f46d39d0d 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -303,24 +303,27 @@ public:
/// given offset. Update the offset to be relative to the field type.
TBAAStructTypeNode getField(uint64_t &Offset) const {
bool NewFormat = isNewFormat();
+ const ArrayRef<MDOperand> Operands(Node->op_begin(), Node->op_end());
+ const unsigned NumOperands = Operands.size();
+
if (NewFormat) {
// New-format root and scalar type nodes have no fields.
- if (Node->getNumOperands() < 6)
+ if (NumOperands < 6)
return TBAAStructTypeNode();
} else {
// Parent can be omitted for the root node.
- if (Node->getNumOperands() < 2)
+ if (NumOperands < 2)
return TBAAStructTypeNode();
// Fast path for a scalar type node and a struct type node with a single
// field.
- if (Node->getNumOperands() <= 3) {
- uint64_t Cur = Node->getNumOperands() == 2
- ? 0
- : mdconst::extract<ConstantInt>(Node->getOperand(2))
- ->getZExtValue();
+ if (NumOperands <= 3) {
+ uint64_t Cur =
+ NumOperands == 2
+ ? 0
+ : mdconst::extract<ConstantInt>(Operands[2])->getZExtValue();
Offset -= Cur;
- MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ MDNode *P = dyn_cast_or_null<MDNode>(Operands[1]);
if (!P)
return TBAAStructTypeNode();
return TBAAStructTypeNode(P);
@@ -332,10 +335,11 @@ public:
unsigned FirstFieldOpNo = NewFormat ? 3 : 1;
unsigned NumOpsPerField = NewFormat ? 3 : 2;
unsigned TheIdx = 0;
- for (unsigned Idx = FirstFieldOpNo; Idx < Node->getNumOperands();
+
+ for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands;
Idx += NumOpsPerField) {
- uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
- ->getZExtValue();
+ uint64_t Cur =
+ mdconst::extract<ConstantInt>(Operands[Idx + 1])->getZExtValue();
if (Cur > Offset) {
assert(Idx >= FirstFieldOpNo + NumOpsPerField &&
"TBAAStructTypeNode::getField should have an offset match!");
@@ -345,11 +349,11 @@ public:
}
// Move along the last field.
if (TheIdx == 0)
- TheIdx = Node->getNumOperands() - NumOpsPerField;
- uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
- ->getZExtValue();
+ TheIdx = NumOperands - NumOpsPerField;
+ uint64_t Cur =
+ mdconst::extract<ConstantInt>(Operands[TheIdx + 1])->getZExtValue();
Offset -= Cur;
- MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+ MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
if (!P)
return TBAAStructTypeNode();
return TBAAStructTypeNode(P);
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 05d5e47bb8d7..add2d427e05b 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4679,27 +4679,22 @@ bool llvm::mustSuppressSpeculation(const LoadInst &LI) {
F.hasFnAttribute(Attribute::SanitizeHWAddress);
}
-
-bool llvm::isSafeToSpeculativelyExecute(const Value *V,
+bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst,
const Instruction *CtxI,
const DominatorTree *DT,
const TargetLibraryInfo *TLI) {
- const Operator *Inst = dyn_cast<Operator>(V);
- if (!Inst)
- return false;
- return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, DT, TLI);
+ return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI,
+ DT, TLI);
}
-bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode,
- const Operator *Inst,
- const Instruction *CtxI,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI) {
+bool llvm::isSafeToSpeculativelyExecuteWithOpcode(
+ unsigned Opcode, const Instruction *Inst, const Instruction *CtxI,
+ const DominatorTree *DT, const TargetLibraryInfo *TLI) {
#ifndef NDEBUG
if (Inst->getOpcode() != Opcode) {
// Check that the operands are actually compatible with the Opcode override.
auto hasEqualReturnAndLeadingOperandTypes =
- [](const Operator *Inst, unsigned NumLeadingOperands) {
+ [](const Instruction *Inst, unsigned NumLeadingOperands) {
if (Inst->getNumOperands() < NumLeadingOperands)
return false;
const Type *ExpectedType = Inst->getType();
@@ -4715,11 +4710,6 @@ bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode,
}
#endif
- for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
- if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
- if (C->canTrap())
- return false;
-
switch (Opcode) {
default:
return true;
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index f863a1ffad3a..894680cda1fc 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -1502,7 +1502,7 @@ void VFABI::getVectorVariantNames(
LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule()));
assert(Info && "Invalid name for a VFABI variant.");
- assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&
+ assert(CI.getModule()->getFunction(Info.value().VectorName) &&
"Vector function is missing.");
#endif
VariantMappings.push_back(std::string(S));
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 30e6f8599208..c9a982693fa7 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -582,7 +582,6 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(no_sanitize_address);
KEYWORD(no_sanitize_hwaddress);
- KEYWORD(no_sanitize_memtag);
KEYWORD(sanitize_address_dyninit);
KEYWORD(ccc);
@@ -661,7 +660,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
- KEYWORD(umin);
+ KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);
KEYWORD(vscale);
KEYWORD(x);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index a1cdeac2b47f..fd502eded0a0 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -456,10 +456,15 @@ bool LLParser::parseTargetDefinition() {
return false;
case lltok::kw_datalayout:
Lex.Lex();
- if (parseToken(lltok::equal, "expected '=' after target datalayout") ||
- parseStringConstant(Str))
+ if (parseToken(lltok::equal, "expected '=' after target datalayout"))
+ return true;
+ LocTy Loc = Lex.getLoc();
+ if (parseStringConstant(Str))
return true;
- M->setDataLayout(Str);
+ Expected<DataLayout> MaybeDL = DataLayout::parse(Str);
+ if (!MaybeDL)
+ return error(Loc, toString(MaybeDL.takeError()));
+ M->setDataLayout(MaybeDL.get());
return false;
}
}
@@ -1107,7 +1112,7 @@ static bool isSanitizer(lltok::Kind Kind) {
switch (Kind) {
case lltok::kw_no_sanitize_address:
case lltok::kw_no_sanitize_hwaddress:
- case lltok::kw_no_sanitize_memtag:
+ case lltok::kw_sanitize_memtag:
case lltok::kw_sanitize_address_dyninit:
return true;
default:
@@ -1128,8 +1133,8 @@ bool LLParser::parseSanitizer(GlobalVariable *GV) {
case lltok::kw_no_sanitize_hwaddress:
Meta.NoHWAddress = true;
break;
- case lltok::kw_no_sanitize_memtag:
- Meta.NoMemtag = true;
+ case lltok::kw_sanitize_memtag:
+ Meta.Memtag = true;
break;
case lltok::kw_sanitize_address_dyninit:
Meta.IsDynInit = true;
@@ -3474,32 +3479,26 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
}
case lltok::kw_extractvalue:
return error(ID.Loc, "extractvalue constexprs are no longer supported");
- case lltok::kw_insertvalue: {
- Lex.Lex();
- Constant *Val0, *Val1;
- SmallVector<unsigned, 4> Indices;
- if (parseToken(lltok::lparen, "expected '(' in insertvalue constantexpr") ||
- parseGlobalTypeAndValue(Val0) ||
- parseToken(lltok::comma,
- "expected comma in insertvalue constantexpr") ||
- parseGlobalTypeAndValue(Val1) || parseIndexList(Indices) ||
- parseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
- return true;
- if (!Val0->getType()->isAggregateType())
- return error(ID.Loc, "insertvalue operand must be aggregate type");
- Type *IndexedType =
- ExtractValueInst::getIndexedType(Val0->getType(), Indices);
- if (!IndexedType)
- return error(ID.Loc, "invalid indices for insertvalue");
- if (IndexedType != Val1->getType())
- return error(ID.Loc, "insertvalue operand and field disagree in type: '" +
- getTypeString(Val1->getType()) +
- "' instead of '" + getTypeString(IndexedType) +
- "'");
- ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, Indices);
- ID.Kind = ValID::t_Constant;
- return false;
- }
+ case lltok::kw_insertvalue:
+ return error(ID.Loc, "insertvalue constexprs are no longer supported");
+ case lltok::kw_udiv:
+ return error(ID.Loc, "udiv constexprs are no longer supported");
+ case lltok::kw_sdiv:
+ return error(ID.Loc, "sdiv constexprs are no longer supported");
+ case lltok::kw_urem:
+ return error(ID.Loc, "urem constexprs are no longer supported");
+ case lltok::kw_srem:
+ return error(ID.Loc, "srem constexprs are no longer supported");
+ case lltok::kw_fadd:
+ return error(ID.Loc, "fadd constexprs are no longer supported");
+ case lltok::kw_fsub:
+ return error(ID.Loc, "fsub constexprs are no longer supported");
+ case lltok::kw_fmul:
+ return error(ID.Loc, "fmul constexprs are no longer supported");
+ case lltok::kw_fdiv:
+ return error(ID.Loc, "fdiv constexprs are no longer supported");
+ case lltok::kw_frem:
+ return error(ID.Loc, "frem constexprs are no longer supported");
case lltok::kw_icmp:
case lltok::kw_fcmp: {
unsigned PredVal, Opc = Lex.getUIntVal();
@@ -3559,17 +3558,8 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
}
// Binary Operators.
case lltok::kw_add:
- case lltok::kw_fadd:
case lltok::kw_sub:
- case lltok::kw_fsub:
case lltok::kw_mul:
- case lltok::kw_fmul:
- case lltok::kw_udiv:
- case lltok::kw_sdiv:
- case lltok::kw_fdiv:
- case lltok::kw_urem:
- case lltok::kw_srem:
- case lltok::kw_frem:
case lltok::kw_shl:
case lltok::kw_lshr:
case lltok::kw_ashr: {
@@ -5398,8 +5388,10 @@ bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
V = PFS->getVal(ID.StrVal, Ty, ID.Loc);
return V == nullptr;
case ValID::t_InlineAsm: {
- if (!ID.FTy || !InlineAsm::Verify(ID.FTy, ID.StrVal2))
+ if (!ID.FTy)
return error(ID.Loc, "invalid type for inline asm constraint string");
+ if (Error Err = InlineAsm::verify(ID.FTy, ID.StrVal2))
+ return error(ID.Loc, toString(std::move(Err)));
V = InlineAsm::get(
ID.FTy, ID.StrVal, ID.StrVal2, ID.UIntVal & 1, (ID.UIntVal >> 1) & 1,
InlineAsm::AsmDialect((ID.UIntVal >> 2) & 1), (ID.UIntVal >> 3) & 1);
@@ -7483,6 +7475,14 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
Operation = AtomicRMWInst::FSub;
IsFP = true;
break;
+ case lltok::kw_fmax:
+ Operation = AtomicRMWInst::FMax;
+ IsFP = true;
+ break;
+ case lltok::kw_fmin:
+ Operation = AtomicRMWInst::FMin;
+ IsFP = true;
+ break;
}
Lex.Lex(); // Eat the operation.
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 93b07fc0db30..8d5a2555f9af 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -69,7 +69,6 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
@@ -1243,6 +1242,8 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
case bitc::RMW_UMIN: return AtomicRMWInst::UMin;
case bitc::RMW_FADD: return AtomicRMWInst::FAdd;
case bitc::RMW_FSUB: return AtomicRMWInst::FSub;
+ case bitc::RMW_FMAX: return AtomicRMWInst::FMax;
+ case bitc::RMW_FMIN: return AtomicRMWInst::FMin;
}
}
@@ -1384,6 +1385,9 @@ static bool isConstExprSupported(uint8_t Opcode) {
if (Opcode >= BitcodeConstant::FirstSpecialOpcode)
return true;
+ if (Instruction::isBinaryOp(Opcode))
+ return ConstantExpr::isSupportedBinOp(Opcode);
+
return !ExpandConstantExprs;
}
@@ -1851,6 +1855,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::DisableSanitizerInstrumentation;
case bitc::ATTR_KIND_ELEMENTTYPE:
return Attribute::ElementType;
+ case bitc::ATTR_KIND_FNRETTHUNK_EXTERN:
+ return Attribute::FnRetThunkExtern;
case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY:
return Attribute::InaccessibleMemOnly;
case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY:
@@ -3672,7 +3678,7 @@ GlobalValue::SanitizerMetadata deserializeSanitizerMetadata(unsigned V) {
if (V & (1 << 1))
Meta.NoHWAddress = true;
if (V & (1 << 2))
- Meta.NoMemtag = true;
+ Meta.Memtag = true;
if (V & (1 << 3))
Meta.IsDynInit = true;
return Meta;
@@ -7441,10 +7447,9 @@ class BitcodeErrorCategoryType : public std::error_category {
} // end anonymous namespace
-static ManagedStatic<BitcodeErrorCategoryType> ErrorCategory;
-
const std::error_category &llvm::BitcodeErrorCategory() {
- return *ErrorCategory;
+ static BitcodeErrorCategoryType ErrorCategory;
+ return ErrorCategory;
}
static Expected<StringRef> readBlobInRecord(BitstreamCursor &Stream,
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 941ed808bab1..590562ce2796 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -577,6 +577,8 @@ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::UMin: return bitc::RMW_UMIN;
case AtomicRMWInst::FAdd: return bitc::RMW_FADD;
case AtomicRMWInst::FSub: return bitc::RMW_FSUB;
+ case AtomicRMWInst::FMax: return bitc::RMW_FMAX;
+ case AtomicRMWInst::FMin: return bitc::RMW_FMIN;
}
}
@@ -632,6 +634,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_COLD;
case Attribute::DisableSanitizerInstrumentation:
return bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION;
+ case Attribute::FnRetThunkExtern:
+ return bitc::ATTR_KIND_FNRETTHUNK_EXTERN;
case Attribute::Hot:
return bitc::ATTR_KIND_HOT;
case Attribute::ElementType:
@@ -1230,7 +1234,7 @@ static_assert(sizeof(GlobalValue::SanitizerMetadata) <= sizeof(unsigned),
static unsigned
serializeSanitizerMetadata(const GlobalValue::SanitizerMetadata &Meta) {
return Meta.NoAddress | (Meta.NoHWAddress << 1) |
- (Meta.NoMemtag << 2) | (Meta.IsDynInit << 3);
+ (Meta.Memtag << 2) | (Meta.IsDynInit << 3);
}
/// Emit top-level description of module, including target triple, inline asm,
@@ -2674,9 +2678,6 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
Record.push_back(VE.getValueID(C->getOperand(1)));
Record.push_back(CE->getPredicate());
break;
- case Instruction::InsertValue:
- report_fatal_error("insertvalue constexprs not supported");
- break;
}
} else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
Code = bitc::CST_CODE_BLOCKADDRESS;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4a31bf85446b..94612a51d2e1 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1816,6 +1816,11 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
assert(MAI->hasVisibilityOnlyWithLinkage() &&
"Visibility should be handled with emitLinkage() on AIX.");
+
+ // Linkage for alias of global variable has been emitted.
+ if (isa<GlobalVariable>(GA.getAliaseeObject()))
+ return;
+
emitLinkage(&GA, Name);
// If it's a function, also emit linkage for aliases of function entry
// point.
@@ -2860,7 +2865,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,
AsmPrinter &AP,
const Constant *BaseCV = nullptr,
- uint64_t Offset = 0);
+ uint64_t Offset = 0,
+ AsmPrinter::AliasMapTy *AliasList = nullptr);
static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP);
static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP);
@@ -2914,9 +2920,21 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
return -1;
}
-static void emitGlobalConstantDataSequential(const DataLayout &DL,
- const ConstantDataSequential *CDS,
- AsmPrinter &AP) {
+static void emitGlobalAliasInline(AsmPrinter &AP, uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
+ if (AliasList) {
+ auto AliasIt = AliasList->find(Offset);
+ if (AliasIt != AliasList->end()) {
+ for (const GlobalAlias *GA : AliasIt->second)
+ AP.OutStreamer->emitLabel(AP.getSymbol(GA));
+ AliasList->erase(Offset);
+ }
+ }
+}
+
+static void emitGlobalConstantDataSequential(
+ const DataLayout &DL, const ConstantDataSequential *CDS, AsmPrinter &AP,
+ AsmPrinter::AliasMapTy *AliasList) {
// See if we can aggregate this into a .fill, if so, emit it as such.
int Value = isRepeatedByteSequence(CDS, DL);
if (Value != -1) {
@@ -2933,17 +2951,20 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
// Otherwise, emit the values in successive locations.
unsigned ElementByteSize = CDS->getElementByteSize();
if (isa<IntegerType>(CDS->getElementType())) {
- for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ emitGlobalAliasInline(AP, ElementByteSize * I, AliasList);
if (AP.isVerbose())
AP.OutStreamer->getCommentOS()
- << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i));
- AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(i),
+ << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(I));
+ AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(I),
ElementByteSize);
}
} else {
Type *ET = CDS->getElementType();
- for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ emitGlobalAliasInline(AP, ElementByteSize * I, AliasList);
emitGlobalConstantFP(CDS->getElementAsAPFloat(I), ET, AP);
+ }
}
unsigned Size = DL.getTypeAllocSize(CDS->getType());
@@ -2956,7 +2977,8 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
static void emitGlobalConstantArray(const DataLayout &DL,
const ConstantArray *CA, AsmPrinter &AP,
- const Constant *BaseCV, uint64_t Offset) {
+ const Constant *BaseCV, uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
// See if we can aggregate some values. Make sure it can be
// represented as a series of bytes of the constant value.
int Value = isRepeatedByteSequence(CA, DL);
@@ -2964,44 +2986,75 @@ static void emitGlobalConstantArray(const DataLayout &DL,
if (Value != -1) {
uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
AP.OutStreamer->emitFill(Bytes, Value);
- }
- else {
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
- emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset);
- Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
+ } else {
+ for (unsigned I = 0, E = CA->getNumOperands(); I != E; ++I) {
+ emitGlobalConstantImpl(DL, CA->getOperand(I), AP, BaseCV, Offset,
+ AliasList);
+ Offset += DL.getTypeAllocSize(CA->getOperand(I)->getType());
}
}
}
+static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP);
+
static void emitGlobalConstantVector(const DataLayout &DL,
- const ConstantVector *CV, AsmPrinter &AP) {
- for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
- emitGlobalConstantImpl(DL, CV->getOperand(i), AP);
+ const ConstantVector *CV, AsmPrinter &AP,
+ AsmPrinter::AliasMapTy *AliasList) {
+ Type *ElementType = CV->getType()->getElementType();
+ uint64_t ElementSizeInBits = DL.getTypeSizeInBits(ElementType);
+ uint64_t ElementAllocSizeInBits = DL.getTypeAllocSizeInBits(ElementType);
+ uint64_t EmittedSize;
+ if (ElementSizeInBits != ElementAllocSizeInBits) {
+ // If the allocation size of an element is different from the size in bits,
+ // printing each element separately will insert incorrect padding.
+ //
+ // The general algorithm here is complicated; instead of writing it out
+ // here, just use the existing code in ConstantFolding.
+ Type *IntT =
+ IntegerType::get(CV->getContext(), DL.getTypeSizeInBits(CV->getType()));
+ ConstantInt *CI = dyn_cast_or_null<ConstantInt>(ConstantFoldConstant(
+ ConstantExpr::getBitCast(const_cast<ConstantVector *>(CV), IntT), DL));
+ if (!CI) {
+ report_fatal_error(
+ "Cannot lower vector global with unusual element type");
+ }
+ emitGlobalAliasInline(AP, 0, AliasList);
+ emitGlobalConstantLargeInt(CI, AP);
+ EmittedSize = DL.getTypeStoreSize(CV->getType());
+ } else {
+ for (unsigned I = 0, E = CV->getType()->getNumElements(); I != E; ++I) {
+ emitGlobalAliasInline(AP, DL.getTypeAllocSize(CV->getType()) * I, AliasList);
+ emitGlobalConstantImpl(DL, CV->getOperand(I), AP);
+ }
+ EmittedSize =
+ DL.getTypeAllocSize(ElementType) * CV->getType()->getNumElements();
+ }
unsigned Size = DL.getTypeAllocSize(CV->getType());
- unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
- CV->getType()->getNumElements();
if (unsigned Padding = Size - EmittedSize)
AP.OutStreamer->emitZeros(Padding);
}
static void emitGlobalConstantStruct(const DataLayout &DL,
const ConstantStruct *CS, AsmPrinter &AP,
- const Constant *BaseCV, uint64_t Offset) {
+ const Constant *BaseCV, uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
// Print the fields in successive locations. Pad to align if needed!
unsigned Size = DL.getTypeAllocSize(CS->getType());
const StructLayout *Layout = DL.getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
- for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
- const Constant *Field = CS->getOperand(i);
+ for (unsigned I = 0, E = CS->getNumOperands(); I != E; ++I) {
+ const Constant *Field = CS->getOperand(I);
// Print the actual field value.
- emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar);
+ emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar,
+ AliasList);
// Check if padding is needed and insert one or more 0s.
uint64_t FieldSize = DL.getTypeAllocSize(Field->getType());
- uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
- - Layout->getElementOffset(i)) - FieldSize;
+ uint64_t PadSize = ((I == E - 1 ? Size : Layout->getElementOffset(I + 1)) -
+ Layout->getElementOffset(I)) -
+ FieldSize;
SizeSoFar += FieldSize + PadSize;
// Insert padding - this may include padding to increase the size of the
@@ -3211,7 +3264,9 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
AsmPrinter &AP, const Constant *BaseCV,
- uint64_t Offset) {
+ uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
+ emitGlobalAliasInline(AP, Offset, AliasList);
uint64_t Size = DL.getTypeAllocSize(CV->getType());
// Globals with sub-elements such as combinations of arrays and structs
@@ -3251,13 +3306,13 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
}
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
- return emitGlobalConstantDataSequential(DL, CDS, AP);
+ return emitGlobalConstantDataSequential(DL, CDS, AP, AliasList);
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
- return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset);
+ return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset, AliasList);
if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
- return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset);
+ return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset, AliasList);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
// Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
@@ -3276,7 +3331,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
}
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
- return emitGlobalConstantVector(DL, V, AP);
+ return emitGlobalConstantVector(DL, V, AP, AliasList);
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
@@ -3292,15 +3347,21 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
-void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV) {
+void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV,
+ AliasMapTy *AliasList) {
uint64_t Size = DL.getTypeAllocSize(CV->getType());
if (Size)
- emitGlobalConstantImpl(DL, CV, *this);
+ emitGlobalConstantImpl(DL, CV, *this, nullptr, 0, AliasList);
else if (MAI->hasSubsectionsViaSymbols()) {
// If the global has zero size, emit a single byte so that two labels don't
// look like they are at the same location.
OutStreamer->emitIntValue(0, 1);
}
+ if (!AliasList)
+ return;
+ for (const auto &AliasPair : *AliasList)
+ report_fatal_error("Aliases with offset " + Twine(AliasPair.first) +
+ " were not emitted.");
}
void AsmPrinter::emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
index 28a02390fccb..c872d0dd2dfa 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
@@ -51,5 +51,5 @@ HANDLE_DIE_HASH_ATTR(DW_AT_virtuality)
HANDLE_DIE_HASH_ATTR(DW_AT_visibility)
HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location)
HANDLE_DIE_HASH_ATTR(DW_AT_type)
-
+HANDLE_DIE_HASH_ATTR(DW_AT_linkage_name)
#undef HANDLE_DIE_HASH_ATTR
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 5ce6fbb5f647..ad9dc517539a 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1646,6 +1646,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FMax:
+ case AtomicRMWInst::FMin:
case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:
// No atomic libcalls are available for max/min/umax/umin.
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 7883a48d121c..59932a542bbc 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -120,8 +120,7 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) {
// Fallback to whether the intrinsic is speculatable.
Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode();
unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call);
- return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc,
- cast<Operator>(&VPI));
+ return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, &VPI);
}
//// } Helpers
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 081c8b125f17..b06043fb4c31 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -500,6 +500,12 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
LLT DstTy = MRI.getType(DstRegs[0]);
LLT LCMTy = getCoverTy(SrcTy, PartTy);
+ if (PartTy.isVector() && LCMTy == PartTy) {
+ assert(DstRegs.size() == 1);
+ B.buildPadVectorWithUndefElements(DstRegs[0], SrcReg);
+ return;
+ }
+
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
unsigned CoveringSize = LCMTy.getSizeInBits();
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 2c94f87804ac..ad0c0c8315dc 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -697,14 +697,16 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
return false;
Register SrcReg = MI.getOperand(1).getReg();
- GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
- if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
- !LoadMI->isSimple())
+ // Don't use getOpcodeDef() here since intermediate instructions may have
+ // multiple users.
+ GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
+ if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
return false;
Register LoadReg = LoadMI->getDstReg();
- LLT LoadTy = MRI.getType(LoadReg);
+ LLT RegTy = MRI.getType(LoadReg);
Register PtrReg = LoadMI->getPointerReg();
+ unsigned RegSize = RegTy.getSizeInBits();
uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
unsigned MaskSizeBits = MaskVal.countTrailingOnes();
@@ -715,7 +717,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
// If the mask covers the whole destination register, there's nothing to
// extend
- if (MaskSizeBits >= LoadTy.getSizeInBits())
+ if (MaskSizeBits >= RegSize)
return false;
// Most targets cannot deal with loads of size < 8 and need to re-legalize to
@@ -725,17 +727,26 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
const MachineMemOperand &MMO = LoadMI->getMMO();
LegalityQuery::MemDesc MemDesc(MMO);
- MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
+
+ // Don't modify the memory access size if this is atomic/volatile, but we can
+ // still adjust the opcode to indicate the high bit behavior.
+ if (LoadMI->isSimple())
+ MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
+ else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize)
+ return false;
+
+ // TODO: Could check if it's legal with the reduced or original memory size.
if (!isLegalOrBeforeLegalizer(
- {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
+ {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
return false;
MatchInfo = [=](MachineIRBuilder &B) {
B.setInstrAndDebugLoc(*LoadMI);
auto &MF = B.getMF();
auto PtrInfo = MMO.getPointerInfo();
- auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
+ LoadMI->eraseFromParent();
};
return true;
}
@@ -805,21 +816,24 @@ bool CombinerHelper::matchSextInRegOfLoad(
MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT RegTy = MRI.getType(DstReg);
+
// Only supports scalars for now.
- if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ if (RegTy.isVector())
return false;
Register SrcReg = MI.getOperand(1).getReg();
auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
- if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) ||
- !LoadDef->isSimple())
+ if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
return false;
+ uint64_t MemBits = LoadDef->getMemSizeInBits();
+
// If the sign extend extends from a narrower width than the load's width,
// then we can narrow the load width when we combine to a G_SEXTLOAD.
// Avoid widening the load at all.
- unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(),
- LoadDef->getMemSizeInBits());
+ unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
// Don't generate G_SEXTLOADs with a < 1 byte width.
if (NewSizeBits < 8)
@@ -831,7 +845,15 @@ bool CombinerHelper::matchSextInRegOfLoad(
const MachineMemOperand &MMO = LoadDef->getMMO();
LegalityQuery::MemDesc MMDesc(MMO);
- MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
+
+ // Don't modify the memory access size if this is atomic/volatile, but we can
+ // still adjust the opcode to indicate the high bit behavior.
+ if (LoadDef->isSimple())
+ MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
+ else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
+ return false;
+
+ // TODO: Could check if it's legal with the reduced or original memory size.
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
{MRI.getType(LoadDef->getDstReg()),
MRI.getType(LoadDef->getPointerReg())},
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index a2af66d28f4a..947facc87b71 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2076,9 +2076,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
case Intrinsic::stackprotector: {
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
- Register GuardVal = MRI->createGenericVirtualRegister(PtrTy);
- getStackGuard(GuardVal, MIRBuilder);
+ Register GuardVal;
+ if (TLI.useLoadStackGuardNode()) {
+ GuardVal = MRI->createGenericVirtualRegister(PtrTy);
+ getStackGuard(GuardVal, MIRBuilder);
+ } else
+ GuardVal = getOrCreateVReg(*CI.getArgOperand(0)); // The guard's value.
AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
int FI = getOrCreateFrameIndex(*Slot);
@@ -2883,6 +2888,12 @@ bool IRTranslator::translateAtomicRMW(const User &U,
case AtomicRMWInst::FSub:
Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
break;
+ case AtomicRMWInst::FMax:
+ Opcode = TargetOpcode::G_ATOMICRMW_FMAX;
+ break;
+ case AtomicRMWInst::FMin:
+ Opcode = TargetOpcode::G_ATOMICRMW_FMIN;
+ break;
}
MIRBuilder.buildAtomicRMW(
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 19ebf46191a9..0d9580e25606 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -473,6 +473,23 @@ MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res,
return buildInstr(ExtOp, Res, Op);
}
+MachineInstrBuilder MachineIRBuilder::buildBoolExtInReg(const DstOp &Res,
+ const SrcOp &Op,
+ bool IsVector,
+ bool IsFP) {
+ const auto *TLI = getMF().getSubtarget().getTargetLowering();
+ switch (TLI->getBooleanContents(IsVector, IsFP)) {
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ return buildSExtInReg(Res, Op, 1);
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ return buildZExtInReg(Res, Op, 1);
+ case TargetLoweringBase::UndefinedBooleanContent:
+ return buildCopy(Res, Op);
+ }
+
+ llvm_unreachable("unexpected BooleanContent");
+}
+
MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc,
const DstOp &Res,
const SrcOp &Op) {
@@ -938,6 +955,20 @@ MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr,
}
MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFMax(const DstOp &OldValRes, const SrcOp &Addr,
+ const SrcOp &Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMAX, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFMin(const DstOp &OldValRes, const SrcOp &Addr,
+ const SrcOp &Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMIN, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
return buildInstr(TargetOpcode::G_FENCE)
.addImm(Ordering)
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index b3f38a3b53f3..55f3ad796291 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -310,10 +310,11 @@ bool InterleavedAccess::lowerInterleavedLoad(
Extracts.push_back(Extract);
continue;
}
- auto *BI = dyn_cast<BinaryOperator>(User);
- if (BI && BI->hasOneUse()) {
- if (auto *SVI = dyn_cast<ShuffleVectorInst>(*BI->user_begin())) {
- BinOpShuffles.insert(SVI);
+ if (auto *BI = dyn_cast<BinaryOperator>(User)) {
+ if (all_of(BI->users(),
+ [](auto *U) { return isa<ShuffleVectorInst>(U); })) {
+ for (auto *SVI : BI->users())
+ BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
continue;
}
}
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 30ca8bd871e8..43c12c67939e 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -536,6 +536,17 @@ public:
// What was the old variable value?
ValueIDNum OldValue = VarLocs[MLoc.asU64()];
+ clobberMloc(MLoc, OldValue, Pos, MakeUndef);
+ }
+ /// Overload that takes an explicit value \p OldValue for when the value in
+ /// \p MLoc has changed and the TransferTracker's locations have not been
+ /// updated yet.
+ void clobberMloc(LocIdx MLoc, ValueIDNum OldValue,
+ MachineBasicBlock::iterator Pos, bool MakeUndef = true) {
+ auto ActiveMLocIt = ActiveMLocs.find(MLoc);
+ if (ActiveMLocIt == ActiveMLocs.end())
+ return;
+
VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue;
// Examine the remaining variable locations: if we can find the same value
@@ -1730,9 +1741,35 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
if (EmulateOldLDV && !SrcRegOp->isKill())
return false;
+ // Before we update MTracker, remember which values were present in each of
+ // the locations about to be overwritten, so that we can recover any
+ // potentially clobbered variables.
+ DenseMap<LocIdx, ValueIDNum> ClobberedLocs;
+ if (TTracker) {
+ for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) {
+ LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI);
+ auto MLocIt = TTracker->ActiveMLocs.find(ClobberedLoc);
+ // If ActiveMLocs isn't tracking this location or there are no variables
+ // using it, don't bother remembering.
+ if (MLocIt == TTracker->ActiveMLocs.end() || MLocIt->second.empty())
+ continue;
+ ValueIDNum Value = MTracker->readReg(*RAI);
+ ClobberedLocs[ClobberedLoc] = Value;
+ }
+ }
+
// Copy MTracker info, including subregs if available.
InstrRefBasedLDV::performCopy(SrcReg, DestReg);
+ // The copy might have clobbered variables based on the destination register.
+ // Tell TTracker about it, passing the old ValueIDNum to search for
+ // alternative locations (or else terminating those variables).
+ if (TTracker) {
+ for (auto LocVal : ClobberedLocs) {
+ TTracker->clobberMloc(LocVal.first, LocVal.second, MI.getIterator(), false);
+ }
+ }
+
// Only produce a transfer of DBG_VALUE within a block where old LDV
// would have. We might make use of the additional value tracking in some
// other way, later.
@@ -1744,15 +1781,6 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
if (EmulateOldLDV && SrcReg != DestReg)
MTracker->defReg(SrcReg, CurBB, CurInst);
- // Finally, the copy might have clobbered variables based on the destination
- // register. Tell TTracker about it, in case a backup location exists.
- if (TTracker) {
- for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) {
- LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI);
- TTracker->clobberMloc(ClobberedLoc, MI.getIterator(), false);
- }
- }
-
return true;
}
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 7d825a8bf853..1242ce20b732 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -1049,12 +1049,17 @@ public:
// we may end up with a main range not covering all subranges.
// This is extremely rare case, so let's check and reconstruct the
// main range.
- for (LiveInterval::SubRange &S : LI.subranges()) {
- if (LI.covers(S))
- continue;
- LI.clear();
- LIS.constructMainRangeFromSubranges(LI);
- break;
+ if (LI.hasSubRanges()) {
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LaneMask = SubReg ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask).none() || LI.covers(S))
+ continue;
+ LI.clear();
+ LIS.constructMainRangeFromSubranges(LI);
+ break;
+ }
}
continue;
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 40ae7053ea09..0c94e1f7e474 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -742,7 +742,7 @@ bool MIParser::parseBasicBlockDefinition(
MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget);
MBB->setIsEHFuncletEntry(IsEHFuncletEntry);
if (SectionID) {
- MBB->setSectionID(SectionID.getValue());
+ MBB->setSectionID(SectionID.value());
MF.setBBSectionsType(BasicBlockSection::List);
}
return false;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index c186d0ba9969..02c44fa85cd9 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -451,7 +451,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (IrrLoopHeaderWeight && IsStandalone) {
if (Indexes) OS << '\t';
OS.indent(2) << "; Irreducible loop header weight: "
- << IrrLoopHeaderWeight.getValue() << '\n';
+ << IrrLoopHeaderWeight.value() << '\n';
}
}
diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 81c97ba6a086..867a7ed584b2 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -106,8 +106,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// We don't want to proceed further for cold functions
// or functions of unknown hotness. Lukewarm functions have no prefix.
Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
- if (SectionPrefix && (SectionPrefix.getValue().equals("unlikely") ||
- SectionPrefix.getValue().equals("unknown"))) {
+ if (SectionPrefix && (SectionPrefix.value().equals("unlikely") ||
+ SectionPrefix.value().equals("unknown"))) {
return false;
}
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 4e00a211713e..5f80445a5a34 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -93,8 +93,11 @@ cl::opt<bool> VerifyScheduling(
cl::opt<bool> ViewMISchedDAGs(
"view-misched-dags", cl::Hidden,
cl::desc("Pop up a window to show MISched dags after they are processed"));
+cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
+ cl::desc("Print schedule DAGs"));
#else
const bool ViewMISchedDAGs = false;
+const bool PrintDAGs = false;
#endif // NDEBUG
} // end namespace llvm
@@ -112,10 +115,6 @@ static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
cl::desc("Only schedule this function"));
static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
cl::desc("Only schedule this MBB#"));
-static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
- cl::desc("Print schedule DAGs"));
-#else
-static const bool PrintDAGs = false;
#endif // NDEBUG
/// Avoid quadratic complexity in unusually large basic blocks by limiting the
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index db04f2bcc095..7a008bae726e 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -293,6 +293,7 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addUsedIfAvailable<LiveStacks>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 3245d9649be1..581168b31384 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -1448,7 +1448,7 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
const TargetRegisterClass *RC) {
// If the init register is not undef, try and find an existing phi.
if (InitReg) {
- auto I = Phis.find({LoopReg, InitReg.getValue()});
+ auto I = Phis.find({LoopReg, InitReg.value()});
if (I != Phis.end())
return I->second;
} else {
@@ -1469,10 +1469,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
return R;
// Found a phi taking undef as input, so rewrite it to take InitReg.
MachineInstr *MI = MRI.getVRegDef(R);
- MI->getOperand(1).setReg(InitReg.getValue());
- Phis.insert({{LoopReg, InitReg.getValue()}, R});
+ MI->getOperand(1).setReg(InitReg.value());
+ Phis.insert({{LoopReg, InitReg.value()}, R});
const TargetRegisterClass *ConstrainRegClass =
- MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
+ MRI.constrainRegClass(R, MRI.getRegClass(InitReg.value()));
assert(ConstrainRegClass && "Expected a valid constrained register class!");
(void)ConstrainRegClass;
UndefPhis.erase(I);
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 1a0f296d5fdc..89a43c4f57f6 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -554,7 +554,7 @@ static void updateLiveness(MachineFunction &MF) {
}
}
-/// Insert restore code for the callee-saved registers used in the function.
+/// Insert spill code for the callee-saved registers used in the function.
static void insertCSRSaves(MachineBasicBlock &SaveBlock,
ArrayRef<CalleeSavedInfo> CSI) {
MachineFunction &MF = *SaveBlock.getParent();
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index d57b0ca6d53d..d6a3997e4b70 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -148,9 +148,6 @@ protected:
/// Run or not the local reassignment heuristic. This information is
/// obtained from the TargetSubtargetInfo.
const bool EnableLocalReassign;
-
-private:
- unsigned NextCascade = 1;
};
/// ImmutableAnalysis abstraction for fetching the Eviction Advisor. We model it
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index c199b6a6cca8..d627519a34aa 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -870,8 +870,8 @@ bool SelectOptimize::computeLoopCosts(
ORE->emit(ORmissL);
return false;
}
- IPredCost += Scaled64::get(ILatency.getValue());
- INonPredCost += Scaled64::get(ILatency.getValue());
+ IPredCost += Scaled64::get(ILatency.value());
+ INonPredCost += Scaled64::get(ILatency.value());
// For a select that can be converted to branch,
// compute its cost as a branch (non-predicated cost).
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa688d9dda3c..2654c00929d8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2392,12 +2392,14 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
// add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
// sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
SDLoc DL(N);
- auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
- SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
- if (SDValue NewC =
- DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
- {ConstantOp, DAG.getConstant(1, DL, VT)}))
+ if (SDValue NewC = DAG.FoldConstantArithmetic(
+ IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
+ {ConstantOp, DAG.getConstant(1, DL, VT)})) {
+ SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
+ Not.getOperand(0), ShAmt);
return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
+ }
+
return SDValue();
}
@@ -3760,6 +3762,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // If there's no chance of borrowing from adjacent bits, then sub is xor:
+ // sub C0, X --> xor X, C0
+ if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
+ if (!C0->isOpaque()) {
+ const APInt &C0Val = C0->getAPIntValue();
+ const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
+ if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+ }
+ }
+
return SDValue();
}
@@ -4550,13 +4563,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
SDLoc DL(N);
// fold (rem c1, c2) -> c1%c2
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
// fold (urem X, -1) -> select(FX == -1, 0, FX)
// Freeze the numerator to avoid a miscompile with an undefined value.
- if (!isSigned && N1C && N1C->isAllOnes()) {
+ if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) {
SDValue F0 = DAG.getFreeze(N0);
SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
@@ -4581,9 +4593,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
- if (N1.getOpcode() == ISD::SHL &&
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
+ // TODO: We should sink the following into isKnownToBePowerOfTwo
+ // using a OrZero parameter analogous to our handling in ValueTracking.
+ if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
- // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
@@ -9288,31 +9303,44 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
// sra (add (shl X, N1C), AddC), N1C -->
// sext (add (trunc X to (width - N1C)), AddC')
- if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
- N0.getOperand(0).getOpcode() == ISD::SHL &&
- N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
- if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
- SDValue Shl = N0.getOperand(0);
- // Determine what the truncate's type would be and ask the target if that
- // is a free operation.
- LLVMContext &Ctx = *DAG.getContext();
- unsigned ShiftAmt = N1C->getZExtValue();
- EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
- if (VT.isVector())
- TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
+ // sra (sub AddC, (shl X, N1C)), N1C -->
+ // sext (sub AddC1',(trunc X to (width - N1C)))
+ if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
+ N0.hasOneUse()) {
+ bool IsAdd = N0.getOpcode() == ISD::ADD;
+ SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
+ if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
+ Shl.hasOneUse()) {
+ // TODO: AddC does not need to be a splat.
+ if (ConstantSDNode *AddC =
+ isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
+ // Determine what the truncate's type would be and ask the target if
+ // that is a free operation.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned ShiftAmt = N1C->getZExtValue();
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
- // TODO: The simple type check probably belongs in the default hook
- // implementation and/or target-specific overrides (because
- // non-simple types likely require masking when legalized), but that
- // restriction may conflict with other transforms.
- if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
- TLI.isTruncateFree(VT, TruncVT)) {
- SDLoc DL(N);
- SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
- SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
- trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
- return DAG.getSExtOrTrunc(Add, DL, VT);
+ // TODO: The simple type check probably belongs in the default hook
+ // implementation and/or target-specific overrides (because
+ // non-simple types likely require masking when legalized), but
+ // that restriction may conflict with other transforms.
+ if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+ SDLoc DL(N);
+ SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+ SDValue ShiftC =
+ DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
+ TruncVT.getScalarSizeInBits()),
+ DL, TruncVT);
+ SDValue Add;
+ if (IsAdd)
+ Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+ else
+ Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
+ return DAG.getSExtOrTrunc(Add, DL, VT);
+ }
}
}
}
@@ -11025,6 +11053,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
return V;
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -13243,18 +13274,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
- // See if we can simplify the input to this truncate through knowledge that
- // only the low bits are being used.
- // For example "trunc (or (shl x, 8), y)" // -> trunc y
- // Currently we only perform this optimization on scalars because vectors
- // may have different active low bits.
- if (!VT.isVector()) {
- APInt Mask =
- APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
- if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
- }
-
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
@@ -13341,6 +13360,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ // See if we can simplify the input to this truncate through knowledge that
+ // only the low bits are being used.
+ // For example "trunc (or (shl x, 8), y)" // -> trunc y
+ // Currently we only perform this optimization on scalars because vectors
+ // may have different active low bits.
+ if (!VT.isVector()) {
+ APInt Mask =
+ APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
+ if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
+ }
+
// fold (truncate (extract_subvector(ext x))) ->
// (extract_subvector x)
// TODO: This can be generalized to cover cases where the truncate and extract
@@ -24514,8 +24545,9 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
auto &Size0 = MUC0.NumBytes;
auto &Size1 = MUC1.NumBytes;
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- Size0 && Size1 && *Size0 == *Size1 && OrigAlignment0 > *Size0 &&
- SrcValOffset0 % *Size0 == 0 && SrcValOffset1 % *Size1 == 0) {
+ Size0.has_value() && Size1.has_value() && *Size0 == *Size1 &&
+ OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
+ SrcValOffset1 % *Size1 == 0) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index f464208cd9dc..6c136bdfc652 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2915,6 +2915,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break;
case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break;
+ case ISD::STACKMAP:
+ Res = SoftPromoteHalfOp_STACKMAP(N, OpNo);
+ break;
}
if (!Res.getNode())
@@ -3042,3 +3045,17 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) {
return DAG.getStore(ST->getChain(), dl, Promoted, ST->getBasePtr(),
ST->getMemOperand());
}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1); // Because the first two arguments are guaranteed legal.
+ SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
+ SDValue Op = N->getOperand(OpNo);
+ NewOps[OpNo] = GetSoftPromotedHalf(Op);
+ SDValue NewNode =
+ DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), NewOps);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum));
+
+ return SDValue(); // Signal that we replaced the node ourselves.
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 69fd83bcd7b3..343722a97c3c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -19,6 +19,7 @@
#include "LegalizeTypes.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -1723,6 +1724,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;
+ case ISD::STACKMAP:
+ Res = PromoteIntOp_STACKMAP(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2255,16 +2259,40 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
SDLoc dl(N);
SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0));
- EVT EltVT = Op.getValueType().getVectorElementType();
- EVT VT = N->getValueType(0);
+ EVT OrigEltVT = N->getOperand(0).getValueType().getVectorElementType();
+ EVT InVT = Op.getValueType();
+ EVT EltVT = InVT.getVectorElementType();
+ EVT ResVT = N->getValueType(0);
+ unsigned Opcode = N->getOpcode();
- if (VT.bitsGE(EltVT))
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
+ // An i1 vecreduce_xor is equivalent to vecreduce_add, use that instead if
+ // vecreduce_xor is not legal
+ if (Opcode == ISD::VECREDUCE_XOR && OrigEltVT == MVT::i1 &&
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_XOR, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_ADD, InVT))
+ Opcode = ISD::VECREDUCE_ADD;
+
+ // An i1 vecreduce_or is equivalent to vecreduce_umax, use that instead if
+ // vecreduce_or is not legal
+ else if (Opcode == ISD::VECREDUCE_OR && OrigEltVT == MVT::i1 &&
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT))
+ Opcode = ISD::VECREDUCE_UMAX;
+
+ // An i1 vecreduce_and is equivalent to vecreduce_umin, use that instead if
+ // vecreduce_and is not legal
+ else if (Opcode == ISD::VECREDUCE_AND && OrigEltVT == MVT::i1 &&
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT))
+ Opcode = ISD::VECREDUCE_UMIN;
+
+ if (ResVT.bitsGE(EltVT))
+ return DAG.getNode(Opcode, SDLoc(N), ResVT, Op);
// Result size must be >= element size. If this is not the case after
// promotion, also promote the result type and then truncate.
- SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op);
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
+ SDValue Reduce = DAG.getNode(Opcode, dl, EltVT, Op);
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Reduce);
}
SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
@@ -2304,6 +2332,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1); // Because the first two arguments are guaranteed legal.
+ SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
+ SDValue Operand = N->getOperand(OpNo);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType());
+ NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand);
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
@@ -4653,6 +4690,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
+ case ISD::STACKMAP:
+ Res = ExpandIntOp_STACKMAP(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -5481,3 +5521,44 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(N->getValueType(0), dl, NewOps);
}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1);
+
+ SDValue Op = N->getOperand(OpNo);
+ SDLoc DL = SDLoc(N);
+ SmallVector<SDValue> NewOps;
+
+ // Copy operands before the one being expanded.
+ for (unsigned I = 0; I < OpNo; I++)
+ NewOps.push_back(N->getOperand(I));
+
+ if (Op->getOpcode() == ISD::Constant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op);
+ EVT Ty = Op.getValueType();
+ if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) {
+ NewOps.push_back(
+ DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty));
+ } else {
+ // FIXME: https://github.com/llvm/llvm-project/issues/55609
+ return SDValue();
+ }
+ } else {
+ // FIXME: Non-constant operands are not yet handled:
+ // - https://github.com/llvm/llvm-project/issues/26431
+ // - https://github.com/llvm/llvm-project/issues/55957
+ return SDValue();
+ }
+
+ // Copy remaining operands.
+ for (unsigned I = OpNo + 1; I < N->getNumOperands(); I++)
+ NewOps.push_back(N->getOperand(I));
+
+ SDValue NewNode = DAG.getNode(N->getOpcode(), DL, N->getVTList(), NewOps);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum));
+
+ return SDValue(); // Signal that we have replaced the node already.
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index de320290bda9..2807b7f5ae68 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -402,6 +402,7 @@ private:
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
+ SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -493,6 +494,7 @@ private:
SDValue ExpandIntOp_RETURNADDR(SDNode *N);
SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N);
+ SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, const SDLoc &dl);
@@ -741,6 +743,7 @@ private:
SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
// Scalarization Support: LegalizeVectorTypes.cpp
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index fa555be00ded..143abc08eeea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5627,7 +5627,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
- unsigned NumElts = VT.getVectorNumElements();
SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
assert(getTypeAction(InOp.getValueType()) ==
TargetLowering::TypeWidenVector &&
@@ -5639,7 +5638,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// See if a widened result type would be legal, if so widen the node.
// FIXME: This isn't safe for StrictFP. Other optimization here is needed.
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
- InVT.getVectorNumElements());
+ InVT.getVectorElementCount());
if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
SDValue Res;
if (N->isStrictFPOpcode()) {
@@ -5665,6 +5664,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
EVT InEltVT = InVT.getVectorElementType();
// Unroll the convert into some scalar code and create a nasty build vector.
+ unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(NumElts);
if (N->isStrictFPOpcode()) {
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
@@ -6055,7 +6055,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
// The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
if (VT.getScalarType() == MVT::i1)
SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- SVT.getVectorNumElements());
+ SVT.getVectorElementCount());
SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
SVT, InOp0, InOp1, N->getOperand(2));
@@ -6063,7 +6063,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
// Extract the needed results from the result vector.
EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
SVT.getVectorElementType(),
- VT.getVectorNumElements());
+ VT.getVectorElementCount());
SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
DAG.getVectorIdxConstant(0, dl));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b3b8756ae9ba..c8d0f5faf647 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -60,7 +60,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
@@ -3271,6 +3270,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
case ISD::SETCC:
+ case ISD::SETCCCARRY:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
@@ -3506,6 +3506,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::USUBO:
case ISD::SSUBO:
+ case ISD::SUBCARRY:
+ case ISD::SSUBO_CARRY:
if (Op.getResNo() == 1) {
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
@@ -3520,6 +3522,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
assert(Op.getResNo() == 0 &&
"We only compute knownbits for the difference here.");
+ // TODO: Compute influence of the carry operand.
+ if (Opcode == ISD::SUBCARRY || Opcode == ISD::SSUBO_CARRY)
+ break;
+
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
@@ -3529,6 +3535,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::UADDO:
case ISD::SADDO:
case ISD::ADDCARRY:
+ case ISD::SADDO_CARRY:
if (Op.getResNo() == 1) {
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
@@ -3548,7 +3555,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (Opcode == ISD::ADDE)
// Can't track carry from glue, set carry to unknown.
Carry.resetAll();
- else if (Opcode == ISD::ADDCARRY)
+ else if (Opcode == ISD::ADDCARRY || Opcode == ISD::SADDO_CARRY)
// TODO: Compute known bits for the carry operand. Not sure if it is worth
// the trouble (how often will we find a known carry bit). And I haven't
// tested this very much yet, but something like this might work:
@@ -3862,6 +3869,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2())
return true;
+ // vscale(power-of-two) is a power-of-two for some targets
+ if (Val.getOpcode() == ISD::VSCALE &&
+ getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() &&
+ isKnownToBeAPowerOfTwo(Val.getOperand(0)))
+ return true;
+
// More could be done here, though the above checks are enough
// to handle some common cases.
@@ -4108,8 +4121,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::min(Tmp, Tmp2);
case ISD::SADDO:
case ISD::UADDO:
+ case ISD::SADDO_CARRY:
+ case ISD::ADDCARRY:
case ISD::SSUBO:
case ISD::USUBO:
+ case ISD::SSUBO_CARRY:
+ case ISD::SUBCARRY:
case ISD::SMULO:
case ISD::UMULO:
if (Op.getResNo() != 1)
@@ -4123,6 +4140,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return VTBits;
break;
case ISD::SETCC:
+ case ISD::SETCCCARRY:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
@@ -7505,6 +7523,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
Opcode == ISD::ATOMIC_LOAD_UMAX ||
Opcode == ISD::ATOMIC_LOAD_FADD ||
Opcode == ISD::ATOMIC_LOAD_FSUB ||
+ Opcode == ISD::ATOMIC_LOAD_FMAX ||
+ Opcode == ISD::ATOMIC_LOAD_FMIN ||
Opcode == ISD::ATOMIC_SWAP ||
Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
@@ -10739,19 +10759,19 @@ namespace {
} // end anonymous namespace
-static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs;
-static ManagedStatic<EVTArray> SimpleVTArray;
-static ManagedStatic<sys::SmartMutex<true>> VTMutex;
-
/// getValueTypeList - Return a pointer to the specified value type.
///
const EVT *SDNode::getValueTypeList(EVT VT) {
+ static std::set<EVT, EVT::compareRawBits> EVTs;
+ static EVTArray SimpleVTArray;
+ static sys::SmartMutex<true> VTMutex;
+
if (VT.isExtended()) {
- sys::SmartScopedLock<true> Lock(*VTMutex);
- return &(*EVTs->insert(VT).first);
+ sys::SmartScopedLock<true> Lock(VTMutex);
+ return &(*EVTs.insert(VT).first);
}
assert(VT.getSimpleVT() < MVT::VALUETYPE_SIZE && "Value type out of range!");
- return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+ return &SimpleVTArray.VTs[VT.getSimpleVT().SimpleTy];
}
/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 37d05cdba76d..fe3c38ec590d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -703,7 +703,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
unsigned NumRegs;
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
- *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
+ *DAG.getContext(), CallConv.value(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
} else {
NumRegs =
@@ -800,11 +800,11 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
for (EVT ValueVT : ValueVTs) {
unsigned NumRegs =
isABIMangled()
- ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)
+ ? TLI.getNumRegistersForCallingConv(Context, CC.value(), ValueVT)
: TLI.getNumRegisters(Context, ValueVT);
MVT RegisterVT =
isABIMangled()
- ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)
+ ? TLI.getRegisterTypeForCallingConv(Context, CC.value(), ValueVT)
: TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
@@ -831,10 +831,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// Copy the legal parts from the registers.
EVT ValueVT = ValueVTs[Value];
unsigned NumRegs = RegCount[Value];
- MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
- *DAG.getContext(),
- CallConv.getValue(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT =
+ isABIMangled() ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(), CallConv.value(), RegVTs[Value])
+ : RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
@@ -914,10 +914,10 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumParts = RegCount[Value];
- MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
- *DAG.getContext(),
- CallConv.getValue(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT =
+ isABIMangled() ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(), CallConv.value(), RegVTs[Value])
+ : RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
@@ -1309,7 +1309,7 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder,
/*IsVariadic=*/false)) {
LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
- << DDI.getDI() << "\nBy stripping back to:\n " << V);
+ << *DDI.getDI() << "\nBy stripping back to:\n " << *V);
return;
}
}
@@ -1321,7 +1321,7 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
- LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI()
+ LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << *DDI.getDI()
<< "\n");
LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
<< "\n");
@@ -3747,13 +3747,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
setValue(&I, DAG.getBuildVector(VT, DL, Ops));
}
-void SelectionDAGBuilder::visitInsertValue(const User &I) {
- ArrayRef<unsigned> Indices;
- if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
- Indices = IV->getIndices();
- else
- Indices = cast<ConstantExpr>(&I)->getIndices();
-
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+ ArrayRef<unsigned> Indices = I.getIndices();
const Value *Op0 = I.getOperand(0);
const Value *Op1 = I.getOperand(1);
Type *AggTy = I.getType();
@@ -4616,6 +4611,8 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
+ case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
+ case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
}
AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
@@ -8410,52 +8407,6 @@ public:
return false;
}
-
- /// getCallOperandValEVT - Return the EVT of the Value* that this operand
- /// corresponds to. If there is no Value* for this operand, it returns
- /// MVT::Other.
- EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
- const DataLayout &DL,
- llvm::Type *ParamElemType) const {
- if (!CallOperandVal) return MVT::Other;
-
- if (isa<BasicBlock>(CallOperandVal))
- return TLI.getProgramPointerTy(DL);
-
- llvm::Type *OpTy = CallOperandVal->getType();
-
- // FIXME: code duplicated from TargetLowering::ParseConstraints().
- // If this is an indirect operand, the operand is a pointer to the
- // accessed type.
- if (isIndirect) {
- OpTy = ParamElemType;
- assert(OpTy && "Indirect operand must have elementtype attribute");
- }
-
- // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
- if (StructType *STy = dyn_cast<StructType>(OpTy))
- if (STy->getNumElements() == 1)
- OpTy = STy->getElementType(0);
-
- // If OpTy is not a single value, it may be a struct/union that we
- // can tile with integers.
- if (!OpTy->isSingleValueType() && OpTy->isSized()) {
- unsigned BitSize = DL.getTypeSizeInBits(OpTy);
- switch (BitSize) {
- default: break;
- case 1:
- case 8:
- case 16:
- case 32:
- case 64:
- case 128:
- OpTy = IntegerType::get(Context, BitSize);
- break;
- }
- }
-
- return TLI.getAsmOperandValueType(DL, OpTy, true);
- }
};
@@ -8722,37 +8673,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
bool HasSideEffect = IA->hasSideEffects();
ExtraFlags ExtraInfo(Call);
- unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
- unsigned ResNo = 0; // ResNo - The result number of the next output.
for (auto &T : TargetConstraints) {
ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
- // Compute the value type for each operand.
- if (OpInfo.hasArg()) {
- OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
+ if (OpInfo.CallOperandVal)
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
- Type *ParamElemTy = Call.getParamElementType(ArgNo);
- EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
- DAG.getDataLayout(), ParamElemTy);
- OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
- ArgNo++;
- } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
- // The return value of the call is this value. As such, there is no
- // corresponding argument.
- assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
- if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
- OpInfo.ConstraintVT = TLI.getSimpleValueType(
- DAG.getDataLayout(), STy->getElementType(ResNo));
- } else {
- assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT = TLI.getAsmOperandValueType(
- DAG.getDataLayout(), Call.getType()).getSimpleVT();
- }
- ++ResNo;
- } else {
- OpInfo.ConstraintVT = MVT::Other;
- }
if (!HasSideEffect)
HasSideEffect = OpInfo.hasMemory(TLI);
@@ -8865,7 +8791,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
if (RegError) {
const MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- const char *RegName = TRI.getName(RegError.getValue());
+ const char *RegName = TRI.getName(RegError.value());
emitInlineAsmError(Call, "register '" + Twine(RegName) +
"' allocated for constraint '" +
Twine(OpInfo.ConstraintCode) +
@@ -9385,9 +9311,9 @@ static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
}
}
-/// Lower llvm.experimental.stackmap directly to its target opcode.
+/// Lower llvm.experimental.stackmap.
void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
- // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
+ // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
// [live variables...])
assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
@@ -9412,29 +9338,45 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
InFlag = Chain.getValue(1);
- // Add the <id> and <numBytes> constants.
- SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
- Ops.push_back(DAG.getTargetConstant(
- cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
- SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
- Ops.push_back(DAG.getTargetConstant(
- cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
- MVT::i32));
+ // Add the STACKMAP operands, starting with DAG house-keeping.
+ Ops.push_back(Chain);
+ Ops.push_back(InFlag);
- // Push live variables for the stack map.
- addStackMapLiveVars(CI, 2, DL, Ops, *this);
+ // Add the <id>, <numShadowBytes> operands.
+ //
+ // These do not require legalisation, and can be emitted directly to target
+ // constant nodes.
+ SDValue ID = getValue(CI.getArgOperand(0));
+ assert(ID.getValueType() == MVT::i64);
+ SDValue IDConst = DAG.getTargetConstant(
+ cast<ConstantSDNode>(ID)->getZExtValue(), DL, ID.getValueType());
+ Ops.push_back(IDConst);
- // We are not pushing any register mask info here on the operands list,
- // because the stackmap doesn't clobber anything.
+ SDValue Shad = getValue(CI.getArgOperand(1));
+ assert(Shad.getValueType() == MVT::i32);
+ SDValue ShadConst = DAG.getTargetConstant(
+ cast<ConstantSDNode>(Shad)->getZExtValue(), DL, Shad.getValueType());
+ Ops.push_back(ShadConst);
- // Push the chain and the glue flag.
- Ops.push_back(Chain);
- Ops.push_back(InFlag);
+ // Add the live variables.
+ for (unsigned I = 2; I < CI.arg_size(); I++) {
+ SDValue Op = getValue(CI.getArgOperand(I));
+
+ // Things on the stack are pointer-typed, meaning that they are already
+ // legal and can be emitted directly to target nodes.
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Ops.push_back(DAG.getTargetFrameIndex(
+ FI->getIndex(), TLI.getFrameIndexTy(DAG.getDataLayout())));
+ } else {
+ // Otherwise emit a target independent node to be legalised.
+ Ops.push_back(getValue(CI.getArgOperand(I)));
+ }
+ }
// Create the STACKMAP node.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
- Chain = SDValue(SM, 0);
+ Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 72cca3d9b001..4a3ab00614b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -529,7 +529,7 @@ private:
void visitShuffleVector(const User &I);
void visitExtractValue(const ExtractValueInst &I);
- void visitInsertValue(const User &I);
+ void visitInsertValue(const InsertValueInst &I);
void visitLandingPad(const LandingPadInst &LP);
void visitGetElementPtr(const User &I);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index bbfc6e5ef64f..9df0b64c26c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -486,6 +486,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
+ case ISD::STACKMAP:
+ return "stackmap";
// Vector Predication
#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 2b63359c2b1b..7f453f081982 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -51,6 +50,7 @@
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -64,7 +64,6 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
@@ -345,47 +344,6 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
-/// may trap on it. In this case we have to split the edge so that the path
-/// through the predecessor block that doesn't go to the phi block doesn't
-/// execute the possibly trapping instruction. If available, we pass domtree
-/// and loop info to be updated when we split critical edges. This is because
-/// SelectionDAGISel preserves these analyses.
-/// This is required for correctness, so it must be done at -O0.
-///
-static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
- LoopInfo *LI) {
- // Loop for blocks with phi nodes.
- for (BasicBlock &BB : Fn) {
- PHINode *PN = dyn_cast<PHINode>(BB.begin());
- if (!PN) continue;
-
- ReprocessBlock:
- // For each block with a PHI node, check to see if any of the input values
- // are potentially trapping constant expressions. Constant expressions are
- // the only potentially trapping value that can occur as the argument to a
- // PHI.
- for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Constant *C = dyn_cast<Constant>(PN->getIncomingValue(i));
- if (!C || !C->canTrap()) continue;
-
- // The only case we have to worry about is when the edge is critical.
- // Since this block has a PHI Node, we assume it has multiple input
- // edges: check to see if the pred has multiple successors.
- BasicBlock *Pred = PN->getIncomingBlock(i);
- if (Pred->getTerminator()->getNumSuccessors() == 1)
- continue;
-
- // Okay, we have to split this edge.
- SplitCriticalEdge(
- Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
- CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges());
- goto ReprocessBlock;
- }
- }
-}
-
static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
MachineModuleInfo &MMI) {
// Only needed for MSVC
@@ -445,10 +403,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
- LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
BlockFrequencyInfo *BFI = nullptr;
if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None)
@@ -456,8 +410,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
-
CurDAG->init(*MF, *ORE, this, LibInfo,
getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI);
FuncInfo->set(Fn, *MF, CurDAG);
@@ -2241,6 +2193,52 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) {
N->getOperand(0));
}
+void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
+ std::vector<SDValue> Ops;
+ auto *It = N->op_begin();
+ SDLoc DL(N);
+
+ // Stash the chain and glue operands so we can move them to the end.
+ SDValue Chain = *It++;
+ SDValue InFlag = *It++;
+
+ // <id> operand.
+ SDValue ID = *It++;
+ assert(ID.getValueType() == MVT::i64);
+ Ops.push_back(ID);
+
+ // <numShadowBytes> operand.
+ SDValue Shad = *It++;
+ assert(Shad.getValueType() == MVT::i32);
+ Ops.push_back(Shad);
+
+ // Live variable operands.
+ for (; It != N->op_end(); It++) {
+ SDNode *OpNode = It->getNode();
+ SDValue O;
+
+ // FrameIndex nodes should have been directly emitted to TargetFrameIndex
+ // nodes at DAG-construction time.
+ assert(OpNode->getOpcode() != ISD::FrameIndex);
+
+ if (OpNode->getOpcode() == ISD::Constant) {
+ Ops.push_back(
+ CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ O = CurDAG->getTargetConstant(
+ cast<ConstantSDNode>(OpNode)->getZExtValue(), DL, It->getValueType());
+ } else {
+ O = *It;
+ }
+ Ops.push_back(O);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(InFlag);
+
+ SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops);
+}
+
/// GetVBR - decode a vbr encoding whose top bit is set.
LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
@@ -2795,6 +2793,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::ARITH_FENCE:
Select_ARITH_FENCE(NodeToMatch);
return;
+ case ISD::STACKMAP:
+ Select_STACKMAP(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 19a52fde44c1..3061158eea30 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -531,14 +531,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
for (const Value *V : SI.Bases) {
auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
if (Opt) {
- assert(Opt.getValue() &&
+ assert(Opt.value() &&
"non gc managed base pointer found in statepoint");
}
}
for (const Value *V : SI.Ptrs) {
auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
if (Opt) {
- assert(Opt.getValue() &&
+ assert(Opt.value() &&
"non gc managed derived pointer found in statepoint");
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a6b471ea22b7..66389a57f780 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1362,6 +1362,29 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
+ // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
+ // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
+ if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ (Op0.getOperand(0).isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
+ Op0->hasOneUse()) {
+ unsigned NumSubElts =
+ Op0.getOperand(1).getValueType().getVectorNumElements();
+ unsigned SubIdx = Op0.getConstantOperandVal(2);
+ APInt DemandedSub =
+ APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
+ KnownBits KnownSubMask =
+ TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
+ if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
+ SDValue NewAnd =
+ TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
+ SDValue NewInsert =
+ TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
+ Op0.getOperand(1), Op0.getOperand(2));
+ return TLO.CombineTo(Op, NewInsert);
+ }
+ }
+
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
@@ -1371,20 +1394,6 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
- // Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
- SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
- Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
- SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
- Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
- if (DemandedOp0 || DemandedOp1) {
- Op0 = DemandedOp0 ? DemandedOp0 : Op0;
- Op1 = DemandedOp1 ? DemandedOp1 : Op1;
- SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
- return TLO.CombineTo(Op, NewOp);
- }
- }
-
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
@@ -1402,6 +1411,20 @@ bool TargetLowering::SimplifyDemandedBits(
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
Known &= Known2;
break;
}
@@ -1418,6 +1441,19 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
@@ -1432,19 +1468,6 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- // If all of the demanded bits are known zero on one side, return the other.
- // These bits cannot contribute to the result of the 'or'.
- if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
- return TLO.CombineTo(Op, Op0);
- if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
- return TLO.CombineTo(Op, Op1);
- // If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
- return true;
- // If the operation can be done in a smaller type, do so.
- if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
- return true;
-
Known |= Known2;
break;
}
@@ -1461,20 +1484,6 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
- // Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
- SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
- Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
- SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
- Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
- if (DemandedOp0 || DemandedOp1) {
- Op0 = DemandedOp0 ? DemandedOp0 : Op0;
- Op1 = DemandedOp1 ? DemandedOp1 : Op1;
- SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
- return TLO.CombineTo(Op, NewOp);
- }
- }
-
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if (DemandedBits.isSubsetOf(Known.Zero))
@@ -1519,6 +1528,20 @@ bool TargetLowering::SimplifyDemandedBits(
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
Known ^= Known2;
break;
}
@@ -1972,9 +1995,9 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umin(Known0, Known1);
if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
- return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
+ return TLO.CombineTo(Op, IsULE.value() ? Op0 : Op1);
if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
- return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
+ return TLO.CombineTo(Op, IsULT.value() ? Op0 : Op1);
break;
}
case ISD::UMAX: {
@@ -1985,9 +2008,9 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umax(Known0, Known1);
if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
- return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
+ return TLO.CombineTo(Op, IsUGE.value() ? Op0 : Op1);
if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
- return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
+ return TLO.CombineTo(Op, IsUGT.value() ? Op0 : Op1);
break;
}
case ISD::BITREVERSE: {
@@ -2486,9 +2509,7 @@ bool TargetLowering::SimplifyDemandedBits(
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
- SDValue NewOp =
- TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
- return TLO.CombineTo(Op, NewOp);
+ Op->setFlags(Flags);
}
return true;
}
@@ -3031,15 +3052,15 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::VSELECT: {
+ SDValue Sel = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+
// Try to transform the select condition based on the current demanded
// elements.
- // TODO: If a condition element is undef, we can choose from one arm of the
- // select (and if one arm is undef, then we can propagate that to the
- // result).
- // TODO - add support for constant vselect masks (see IR version of this).
- APInt UnusedUndef, UnusedZero;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
- UnusedZero, TLO, Depth + 1))
+ APInt UndefSel, UndefZero;
+ if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
+ Depth + 1))
return true;
// See if we can simplify either vselect operand.
@@ -3047,15 +3068,24 @@ bool TargetLowering::SimplifyDemandedVectorElts(
APInt DemandedRHS(DemandedElts);
APInt UndefLHS, ZeroLHS;
APInt UndefRHS, ZeroRHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
- ZeroLHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
- ZeroRHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
return true;
KnownUndef = UndefLHS & UndefRHS;
KnownZero = ZeroLHS & ZeroRHS;
+
+ // If we know that the selected element is always zero, we don't need the
+ // select value element.
+ APInt DemandedSel = DemandedElts & ~KnownZero;
+ if (DemandedSel != DemandedElts)
+ if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
+ Depth + 1))
+ return true;
+
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -5239,17 +5269,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
case 32:
case 64:
case 128:
- OpInfo.ConstraintVT =
- MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ OpTy = IntegerType::get(OpTy->getContext(), BitSize);
break;
}
- } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
- unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
- OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
- } else {
- OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
}
+ EVT VT = getAsmOperandValueType(DL, OpTy, true);
+ OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
ArgNo++;
}
}
@@ -7833,7 +7859,7 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
// return popcount(~x);
//
// Ref: "Hacker's Delight" by Henry Warren
- for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
+ for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
Op = DAG.getNode(ISD::OR, dl, VT, Op,
DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index f3d68bd9c92d..2badbe34ae6a 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -449,9 +449,6 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
Name == ".llvmbc" || Name == ".llvmcmd")
return SectionKind::getMetadata();
- if (Name == ".llvm.offloading")
- return SectionKind::getExclude();
-
if (Name.empty() || Name[0] != '.') return K;
// Default implementation based on some magic section names.
@@ -501,6 +498,9 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) {
if (hasPrefix(Name, ".preinit_array"))
return ELF::SHT_PREINIT_ARRAY;
+ if (hasPrefix(Name, ".llvm.offloading"))
+ return ELF::SHT_LLVM_OFFLOADING;
+
if (K.isBSS() || K.isThreadBSS())
return ELF::SHT_NOBITS;
diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
index 50c52190c1f6..298359dea9af 100644
--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -1005,6 +1005,7 @@ void DWARFLinker::DIECloner::cloneExpression(
// instead indicate the generic type. The same holds for
// DW_OP_reinterpret, which is currently not supported.
if (RefOffset > 0 || Op.getCode() != dwarf::DW_OP_convert) {
+ RefOffset += Unit.getOrigUnit().getOffset();
auto RefDie = Unit.getOrigUnit().getDIEForOffset(RefOffset);
CompileUnit::DIEInfo &Info = Unit.getInfo(RefDie);
if (DIE *Clone = Info.Clone)
diff --git a/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp b/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
index d12f6c796e50..74803a3e495a 100644
--- a/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
+++ b/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
@@ -8,7 +8,6 @@
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include <string>
using namespace llvm;
@@ -42,9 +41,9 @@ public:
};
} // namespace
-static llvm::ManagedStatic<CodeViewErrorCategory> CodeViewErrCategory;
const std::error_category &llvm::codeview::CVErrorCategory() {
- return *CodeViewErrCategory;
+ static CodeViewErrorCategory CodeViewErrCategory;
+ return CodeViewErrCategory;
}
char CodeViewError::ID;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index c785026f8461..2e567d8bc7ee 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1205,13 +1205,13 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram,
if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) {
if (const auto *LT = CU->getContext().getLineTableForUnit(CU))
LT->getFileNameByIndex(
- DeclFileAttr->getAsUnsignedConstant().getValue(),
+ DeclFileAttr->getAsUnsignedConstant().value(),
CU->getCompilationDir(),
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
Local.DeclFile);
}
if (auto DeclLineAttr = Die.find(DW_AT_decl_line))
- Local.DeclLine = DeclLineAttr->getAsUnsignedConstant().getValue();
+ Local.DeclLine = DeclLineAttr->getAsUnsignedConstant().value();
Result.push_back(Local);
return;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 2e0780e249aa..33856c12b3c9 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -327,20 +327,20 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
FileEntry.Source = Value;
break;
case DW_LNCT_directory_index:
- FileEntry.DirIdx = Value.getAsUnsignedConstant().getValue();
+ FileEntry.DirIdx = Value.getAsUnsignedConstant().value();
break;
case DW_LNCT_timestamp:
- FileEntry.ModTime = Value.getAsUnsignedConstant().getValue();
+ FileEntry.ModTime = Value.getAsUnsignedConstant().value();
break;
case DW_LNCT_size:
- FileEntry.Length = Value.getAsUnsignedConstant().getValue();
+ FileEntry.Length = Value.getAsUnsignedConstant().value();
break;
case DW_LNCT_MD5:
- if (!Value.getAsBlock() || Value.getAsBlock().getValue().size() != 16)
+ if (!Value.getAsBlock() || Value.getAsBlock().value().size() != 16)
return createStringError(
errc::invalid_argument,
"failed to parse file entry because the MD5 hash is invalid");
- std::uninitialized_copy_n(Value.getAsBlock().getValue().begin(), 16,
+ std::uninitialized_copy_n(Value.getAsBlock().value().begin(), 16,
FileEntry.Checksum.begin());
break;
default:
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 96c546250974..15a2d23c4fd2 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -136,23 +136,30 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
auto Color = HighlightColor::Enumerator;
if (Attr == DW_AT_decl_file || Attr == DW_AT_call_file) {
Color = HighlightColor::String;
- if (const auto *LT = U->getContext().getLineTableForUnit(U))
- if (LT->getFileNameByIndex(
- *FormValue.getAsUnsignedConstant(), U->getCompilationDir(),
- DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) {
- File = '"' + File + '"';
- Name = File;
+ if (const auto *LT = U->getContext().getLineTableForUnit(U)) {
+ if (Optional<uint64_t> Val = FormValue.getAsUnsignedConstant()) {
+ if (LT->getFileNameByIndex(
+ *Val, U->getCompilationDir(),
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+ File)) {
+ File = '"' + File + '"';
+ Name = File;
+ }
}
+ }
} else if (Optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
Name = AttributeValueString(Attr, *Val);
if (!Name.empty())
WithColor(OS, Color) << Name;
- else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line)
- OS << *FormValue.getAsUnsignedConstant();
- else if (Attr == DW_AT_low_pc &&
- (FormValue.getAsAddress() ==
- dwarf::computeTombstoneAddress(U->getAddressByteSize()))) {
+ else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line) {
+ if (Optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
+ OS << *Val;
+ else
+ FormValue.dump(OS, DumpOpts);
+ } else if (Attr == DW_AT_low_pc &&
+ (FormValue.getAsAddress() ==
+ dwarf::computeTombstoneAddress(U->getAddressByteSize()))) {
if (DumpOpts.Verbose) {
FormValue.dump(OS, DumpOpts);
OS << " (";
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index c704f8f583af..2be2a12aa025 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -704,6 +704,14 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
}
break;
}
+ case DW_AT_call_line:
+ case DW_AT_decl_line: {
+ if (!AttrValue.Value.getAsUnsignedConstant()) {
+ ReportError("DIE has " + AttributeString(Attr) +
+ " with invalid encoding");
+ }
+ break;
+ }
default:
break;
}
diff --git a/llvm/lib/DebugInfo/MSF/MSFError.cpp b/llvm/lib/DebugInfo/MSF/MSFError.cpp
index 9df2158423a4..fd93c3e726cc 100644
--- a/llvm/lib/DebugInfo/MSF/MSFError.cpp
+++ b/llvm/lib/DebugInfo/MSF/MSFError.cpp
@@ -8,7 +8,6 @@
#include "llvm/DebugInfo/MSF/MSFError.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include <string>
using namespace llvm;
@@ -50,7 +49,9 @@ public:
};
} // namespace
-static llvm::ManagedStatic<MSFErrorCategory> MSFCategory;
-const std::error_category &llvm::msf::MSFErrCategory() { return *MSFCategory; }
+const std::error_category &llvm::msf::MSFErrCategory() {
+ static MSFErrorCategory MSFCategory;
+ return MSFCategory;
+}
char MSFError::ID;
diff --git a/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp b/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
index 819651f77787..0bd93a0e9506 100644
--- a/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
+++ b/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
@@ -1,6 +1,5 @@
#include "llvm/DebugInfo/PDB/DIA/DIAError.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -31,7 +30,9 @@ public:
}
};
-static llvm::ManagedStatic<DIAErrorCategory> DIACategory;
-const std::error_category &llvm::pdb::DIAErrCategory() { return *DIACategory; }
+const std::error_category &llvm::pdb::DIAErrCategory() {
+ static DIAErrorCategory DIACategory;
+ return DIACategory;
+}
char DIAError::ID;
diff --git a/llvm/lib/DebugInfo/PDB/GenericError.cpp b/llvm/lib/DebugInfo/PDB/GenericError.cpp
index 0e4cba3174b2..d6da2dd62140 100644
--- a/llvm/lib/DebugInfo/PDB/GenericError.cpp
+++ b/llvm/lib/DebugInfo/PDB/GenericError.cpp
@@ -8,7 +8,6 @@
#include "llvm/DebugInfo/PDB/GenericError.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -42,7 +41,9 @@ public:
};
} // namespace
-static llvm::ManagedStatic<PDBErrorCategory> PDBCategory;
-const std::error_category &llvm::pdb::PDBErrCategory() { return *PDBCategory; }
+const std::error_category &llvm::pdb::PDBErrCategory() {
+ static PDBErrorCategory PDBCategory;
+ return PDBCategory;
+}
char PDBError::ID;
diff --git a/llvm/lib/DebugInfo/PDB/Native/RawError.cpp b/llvm/lib/DebugInfo/PDB/Native/RawError.cpp
index ed6cf0839675..31320288a603 100644
--- a/llvm/lib/DebugInfo/PDB/Native/RawError.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/RawError.cpp
@@ -1,6 +1,5 @@
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -47,7 +46,9 @@ public:
};
} // namespace
-static llvm::ManagedStatic<RawErrorCategory> RawCategory;
-const std::error_category &llvm::pdb::RawErrCategory() { return *RawCategory; }
+const std::error_category &llvm::pdb::RawErrCategory() {
+ static RawErrorCategory RawCategory;
+ return RawCategory;
+}
char RawError::ID;
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index d2ff8aa7c995..c239d4c260ec 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -327,6 +327,8 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> &Obj) {
return {};
}
+} // end anonymous namespace
+
Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) {
Optional<ArrayRef<uint8_t>> BuildID;
if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Obj))
@@ -342,8 +344,6 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) {
return BuildID;
}
-} // end anonymous namespace
-
ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
const MachOObjectFile *MachExeObj,
const std::string &ArchName) {
diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp
index 7b1c36fdbe09..ef4e11ca38e6 100644
--- a/llvm/lib/Debuginfod/Debuginfod.cpp
+++ b/llvm/lib/Debuginfod/Debuginfod.cpp
@@ -8,25 +8,39 @@
///
/// \file
///
-/// This file defines the fetchInfo function, which retrieves
-/// any of the three supported artifact types: (executable, debuginfo, source
-/// file) associated with a build-id from debuginfod servers. If a source file
-/// is to be fetched, its absolute path must be specified in the Description
-/// argument to fetchInfo.
+/// This file contains several definitions for the debuginfod client and server.
+/// For the client, this file defines the fetchInfo function. For the server,
+/// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
+/// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
+/// function retrieves any of the three supported artifact types: (executable,
+/// debuginfo, source file) associated with a build-id from debuginfod servers.
+/// If a source file is to be fetched, its absolute path must be specified in
+/// the Description argument to fetchInfo. The DebuginfodLogEntry,
+/// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
+/// scan the local filesystem for binaries and serve the debuginfod protocol.
///
//===----------------------------------------------------------------------===//
#include "llvm/Debuginfod/Debuginfod.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/Debuginfod/HTTPClient.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CachePruning.h"
#include "llvm/Support/Caching.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/xxhash.h"
+#include <atomic>
+
namespace llvm {
static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); }
@@ -46,6 +60,8 @@ Expected<SmallVector<StringRef>> getDefaultDebuginfodUrls() {
return DebuginfodUrls;
}
+/// Finds a default local file caching directory for the debuginfod client,
+/// first checking DEBUGINFOD_CACHE_PATH.
Expected<std::string> getDefaultDebuginfodCacheDirectory() {
if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
return CacheDirectoryEnv;
@@ -208,4 +224,293 @@ Expected<std::string> getCachedOrDownloadArtifact(
return createStringError(errc::argument_out_of_domain, "build id not found");
}
+
+DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
+ : Message(Message.str()) {}
+
+void DebuginfodLog::push(const Twine &Message) {
+ push(DebuginfodLogEntry(Message));
+}
+
+void DebuginfodLog::push(DebuginfodLogEntry Entry) {
+ {
+ std::lock_guard<std::mutex> Guard(QueueMutex);
+ LogEntryQueue.push(Entry);
+ }
+ QueueCondition.notify_one();
+}
+
+DebuginfodLogEntry DebuginfodLog::pop() {
+ {
+ std::unique_lock<std::mutex> Guard(QueueMutex);
+ // Wait for messages to be pushed into the queue.
+ QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
+ }
+ std::lock_guard<std::mutex> Guard(QueueMutex);
+ if (!LogEntryQueue.size())
+ llvm_unreachable("Expected message in the queue.");
+
+ DebuginfodLogEntry Entry = LogEntryQueue.front();
+ LogEntryQueue.pop();
+ return Entry;
+}
+
+DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
+ DebuginfodLog &Log, ThreadPool &Pool,
+ double MinInterval)
+ : Log(Log), Pool(Pool), MinInterval(MinInterval) {
+ for (StringRef Path : PathsRef)
+ Paths.push_back(Path.str());
+}
+
+Error DebuginfodCollection::update() {
+ std::lock_guard<sys::Mutex> Guard(UpdateMutex);
+ if (UpdateTimer.isRunning())
+ UpdateTimer.stopTimer();
+ UpdateTimer.clear();
+ for (const std::string &Path : Paths) {
+ Log.push("Updating binaries at path " + Path);
+ if (Error Err = findBinaries(Path))
+ return Err;
+ }
+ Log.push("Updated collection");
+ UpdateTimer.startTimer();
+ return Error::success();
+}
+
+Expected<bool> DebuginfodCollection::updateIfStale() {
+ if (!UpdateTimer.isRunning())
+ return false;
+ UpdateTimer.stopTimer();
+ double Time = UpdateTimer.getTotalTime().getWallTime();
+ UpdateTimer.startTimer();
+ if (Time < MinInterval)
+ return false;
+ if (Error Err = update())
+ return std::move(Err);
+ return true;
+}
+
+Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
+ while (true) {
+ if (Error Err = update())
+ return Err;
+ std::this_thread::sleep_for(Interval);
+ }
+ llvm_unreachable("updateForever loop should never end");
+}
+
+static bool isDebugBinary(object::ObjectFile *Object) {
+ // TODO: handle PDB debuginfo
+ std::unique_ptr<DWARFContext> Context = DWARFContext::create(
+ *Object, DWARFContext::ProcessDebugRelocations::Process);
+ const DWARFObject &DObj = Context->getDWARFObj();
+ unsigned NumSections = 0;
+ DObj.forEachInfoSections([&](const DWARFSection &S) { NumSections++; });
+ return NumSections;
+}
+
+static bool hasELFMagic(StringRef FilePath) {
+ file_magic Type;
+ std::error_code EC = identify_magic(FilePath, Type);
+ if (EC)
+ return false;
+ switch (Type) {
+ case file_magic::elf:
+ case file_magic::elf_relocatable:
+ case file_magic::elf_executable:
+ case file_magic::elf_shared_object:
+ case file_magic::elf_core:
+ return true;
+ default:
+ return false;
+ }
+}
+
+Error DebuginfodCollection::findBinaries(StringRef Path) {
+ std::error_code EC;
+ sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
+ std::mutex IteratorMutex;
+ ThreadPoolTaskGroup IteratorGroup(Pool);
+ for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
+ WorkerIndex++) {
+ IteratorGroup.async([&, this]() -> void {
+ std::string FilePath;
+ while (true) {
+ {
+ // Check if iteration is over or there is an error during iteration
+ std::lock_guard<std::mutex> Guard(IteratorMutex);
+ if (I == E || EC)
+ return;
+ // Grab a file path from the directory iterator and advance the
+ // iterator.
+ FilePath = I->path();
+ I.increment(EC);
+ }
+
+ // Inspect the file at this path to determine if it is debuginfo.
+ if (!hasELFMagic(FilePath))
+ continue;
+
+ Expected<object::OwningBinary<object::Binary>> BinOrErr =
+ object::createBinary(FilePath);
+
+ if (!BinOrErr) {
+ consumeError(BinOrErr.takeError());
+ continue;
+ }
+ object::Binary *Bin = std::move(BinOrErr.get().getBinary());
+ if (!Bin->isObject())
+ continue;
+
+ // TODO: Support non-ELF binaries
+ object::ELFObjectFileBase *Object =
+ dyn_cast<object::ELFObjectFileBase>(Bin);
+ if (!Object)
+ continue;
+
+ Optional<BuildIDRef> ID = symbolize::getBuildID(Object);
+ if (!ID)
+ continue;
+
+ std::string IDString = buildIDToString(ID.value());
+ if (isDebugBinary(Object)) {
+ std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
+ DebugBinaries[IDString] = FilePath;
+ } else {
+ std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
+ Binaries[IDString] = FilePath;
+ }
+ }
+ });
+ }
+ IteratorGroup.wait();
+ std::unique_lock<std::mutex> Guard(IteratorMutex);
+ if (EC)
+ return errorCodeToError(EC);
+ return Error::success();
+}
+
+Expected<Optional<std::string>>
+DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
+ Log.push("getting binary path of ID " + buildIDToString(ID));
+ std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
+ auto Loc = Binaries.find(buildIDToString(ID));
+ if (Loc != Binaries.end()) {
+ std::string Path = Loc->getValue();
+ return Path;
+ }
+ return None;
+}
+
+Expected<Optional<std::string>>
+DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
+ Log.push("getting debug binary path of ID " + buildIDToString(ID));
+ std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
+ auto Loc = DebugBinaries.find(buildIDToString(ID));
+ if (Loc != DebugBinaries.end()) {
+ std::string Path = Loc->getValue();
+ return Path;
+ }
+ return None;
+}
+
+Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
+ {
+ // Check collection; perform on-demand update if stale.
+ Expected<Optional<std::string>> PathOrErr = getBinaryPath(ID);
+ if (!PathOrErr)
+ return PathOrErr.takeError();
+ Optional<std::string> Path = *PathOrErr;
+ if (!Path) {
+ Expected<bool> UpdatedOrErr = updateIfStale();
+ if (!UpdatedOrErr)
+ return UpdatedOrErr.takeError();
+ if (*UpdatedOrErr) {
+ // Try once more.
+ PathOrErr = getBinaryPath(ID);
+ if (!PathOrErr)
+ return PathOrErr.takeError();
+ Path = *PathOrErr;
+ }
+ }
+ if (Path)
+ return Path.value();
+ }
+
+ // Try federation.
+ Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
+ if (!PathOrErr)
+ consumeError(PathOrErr.takeError());
+
+ // Fall back to debug binary.
+ return findDebugBinaryPath(ID);
+}
+
+Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
+ // Check collection; perform on-demand update if stale.
+ Expected<Optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
+ if (!PathOrErr)
+ return PathOrErr.takeError();
+ Optional<std::string> Path = *PathOrErr;
+ if (!Path) {
+ Expected<bool> UpdatedOrErr = updateIfStale();
+ if (!UpdatedOrErr)
+ return UpdatedOrErr.takeError();
+ if (*UpdatedOrErr) {
+ // Try once more.
+ PathOrErr = getBinaryPath(ID);
+ if (!PathOrErr)
+ return PathOrErr.takeError();
+ Path = *PathOrErr;
+ }
+ }
+ if (Path)
+ return Path.value();
+
+ // Try federation.
+ return getCachedOrDownloadDebuginfo(ID);
+}
+
+DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
+ DebuginfodCollection &Collection)
+ : Log(Log), Collection(Collection) {
+ cantFail(
+ Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
+ Log.push("GET " + Request.UrlPath);
+ std::string IDString;
+ if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
+ Request.setResponse(
+ {404, "text/plain", "Build ID is not a hex string\n"});
+ return;
+ }
+ BuildID ID(IDString.begin(), IDString.end());
+ Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
+ if (Error Err = PathOrErr.takeError()) {
+ consumeError(std::move(Err));
+ Request.setResponse({404, "text/plain", "Build ID not found\n"});
+ return;
+ }
+ streamFile(Request, *PathOrErr);
+ }));
+ cantFail(
+ Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
+ Log.push("GET " + Request.UrlPath);
+ std::string IDString;
+ if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
+ Request.setResponse(
+ {404, "text/plain", "Build ID is not a hex string\n"});
+ return;
+ }
+ BuildID ID(IDString.begin(), IDString.end());
+ Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
+ if (Error Err = PathOrErr.takeError()) {
+ consumeError(std::move(Err));
+ Request.setResponse({404, "text/plain", "Build ID not found\n"});
+ return;
+ }
+ streamFile(Request, *PathOrErr);
+ }));
+}
+
} // namespace llvm
diff --git a/llvm/lib/Debuginfod/HTTPServer.cpp b/llvm/lib/Debuginfod/HTTPServer.cpp
new file mode 100644
index 000000000000..2ea923d5a734
--- /dev/null
+++ b/llvm/lib/Debuginfod/HTTPServer.cpp
@@ -0,0 +1,189 @@
+//===-- llvm/Debuginfod/HTTPServer.cpp - HTTP server library -----*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///
+/// This file defines the methods of the HTTPServer class and the streamFile
+/// function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debuginfod/HTTPServer.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+
+#ifdef LLVM_ENABLE_HTTPLIB
+#include "httplib.h"
+#endif
+
+using namespace llvm;
+
+bool llvm::streamFile(HTTPServerRequest &Request, StringRef FilePath) {
+ Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(FilePath);
+ if (Error Err = FDOrErr.takeError()) {
+ consumeError(std::move(Err));
+ Request.setResponse({404u, "text/plain", "Could not open file to read.\n"});
+ return false;
+ }
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getOpenFile(*FDOrErr, FilePath,
+ /*FileSize=*/-1,
+ /*RequiresNullTerminator=*/false);
+ sys::fs::closeFile(*FDOrErr);
+ if (Error Err = errorCodeToError(MBOrErr.getError())) {
+ consumeError(std::move(Err));
+ Request.setResponse({404u, "text/plain", "Could not memory-map file.\n"});
+ return false;
+ }
+ // Lambdas are copied on conversion to to std::function, preventing use of
+ // smart pointers.
+ MemoryBuffer *MB = MBOrErr->release();
+ Request.setResponse({200u, "application/octet-stream", MB->getBufferSize(),
+ [=](size_t Offset, size_t Length) -> StringRef {
+ return MB->getBuffer().substr(Offset, Length);
+ },
+ [=](bool Success) { delete MB; }});
+ return true;
+}
+
+#ifdef LLVM_ENABLE_HTTPLIB
+
+bool HTTPServer::isAvailable() { return true; }
+
+HTTPServer::HTTPServer() { Server = std::make_unique<httplib::Server>(); }
+
+HTTPServer::~HTTPServer() { stop(); }
+
+static void expandUrlPathMatches(const std::smatch &Matches,
+ HTTPServerRequest &Request) {
+ bool UrlPathSet = false;
+ for (const auto &it : Matches) {
+ if (UrlPathSet)
+ Request.UrlPathMatches.push_back(it);
+ else {
+ Request.UrlPath = it;
+ UrlPathSet = true;
+ }
+ }
+}
+
+HTTPServerRequest::HTTPServerRequest(const httplib::Request &HTTPLibRequest,
+ httplib::Response &HTTPLibResponse)
+ : HTTPLibResponse(HTTPLibResponse) {
+ expandUrlPathMatches(HTTPLibRequest.matches, *this);
+}
+
+void HTTPServerRequest::setResponse(HTTPResponse Response) {
+ HTTPLibResponse.set_content(Response.Body.begin(), Response.Body.size(),
+ Response.ContentType);
+ HTTPLibResponse.status = Response.Code;
+}
+
+void HTTPServerRequest::setResponse(StreamingHTTPResponse Response) {
+ HTTPLibResponse.set_content_provider(
+ Response.ContentLength, Response.ContentType,
+ [=](size_t Offset, size_t Length, httplib::DataSink &Sink) {
+ if (Offset < Response.ContentLength) {
+ StringRef Chunk = Response.Provider(Offset, Length);
+ Sink.write(Chunk.begin(), Chunk.size());
+ }
+ return true;
+ },
+ [=](bool Success) { Response.CompletionHandler(Success); });
+
+ HTTPLibResponse.status = Response.Code;
+}
+
+Error HTTPServer::get(StringRef UrlPathPattern, HTTPRequestHandler Handler) {
+ std::string ErrorMessage;
+ if (!Regex(UrlPathPattern).isValid(ErrorMessage))
+ return createStringError(errc::argument_out_of_domain, ErrorMessage);
+ Server->Get(std::string(UrlPathPattern),
+ [Handler](const httplib::Request &HTTPLibRequest,
+ httplib::Response &HTTPLibResponse) {
+ HTTPServerRequest Request(HTTPLibRequest, HTTPLibResponse);
+ Handler(Request);
+ });
+ return Error::success();
+}
+
+Error HTTPServer::bind(unsigned ListenPort, const char *HostInterface) {
+ if (!Server->bind_to_port(HostInterface, ListenPort))
+ return createStringError(errc::io_error,
+ "Could not assign requested address.");
+ Port = ListenPort;
+ return Error::success();
+}
+
+Expected<unsigned> HTTPServer::bind(const char *HostInterface) {
+ int ListenPort = Server->bind_to_any_port(HostInterface);
+ if (ListenPort < 0)
+ return createStringError(errc::io_error,
+ "Could not assign any port on requested address.");
+ return Port = ListenPort;
+}
+
+Error HTTPServer::listen() {
+ if (!Port)
+ return createStringError(errc::io_error,
+ "Cannot listen without first binding to a port.");
+ if (!Server->listen_after_bind())
+ return createStringError(
+ errc::io_error,
+ "An unknown error occurred when cpp-httplib attempted to listen.");
+ return Error::success();
+}
+
+void HTTPServer::stop() {
+ Server->stop();
+ Port = 0;
+}
+
+#else
+
+// TODO: Implement barebones standalone HTTP server implementation.
+bool HTTPServer::isAvailable() { return false; }
+
+HTTPServer::HTTPServer() = default;
+
+HTTPServer::~HTTPServer() = default;
+
+void HTTPServerRequest::setResponse(HTTPResponse Response) {
+ llvm_unreachable("No HTTP server implementation available");
+}
+
+void HTTPServerRequest::setResponse(StreamingHTTPResponse Response) {
+ llvm_unreachable("No HTTP server implementation available");
+}
+
+Error HTTPServer::get(StringRef UrlPathPattern, HTTPRequestHandler Handler) {
+ llvm_unreachable("No HTTP server implementation available");
+}
+
+Error HTTPServer::bind(unsigned ListenPort, const char *HostInterface) {
+ llvm_unreachable("No HTTP server implementation available");
+}
+
+Expected<unsigned> HTTPServer::bind(const char *HostInterface) {
+ llvm_unreachable("No HTTP server implementation available");
+}
+
+Error HTTPServer::listen() {
+ llvm_unreachable("No HTTP server implementation available");
+}
+
+void HTTPServer::stop() {
+ llvm_unreachable("No HTTP server implementation available");
+}
+
+#endif // LLVM_ENABLE_HTTPLIB
diff --git a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
index 29a623ebe449..f1eeee3b3599 100644
--- a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -12,7 +12,6 @@
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
#include <mutex>
@@ -91,11 +90,18 @@ typedef llvm::DenseMap<JITEventListener::ObjectKey, RegisteredObjectInfo>
/// object files that are in executable memory managed by the client of this
/// class.
class GDBJITRegistrationListener : public JITEventListener {
+ /// Lock used to serialize all jit registration events, since they
+ /// modify global variables.
+ ///
+ /// Only a single instance of GDBJITRegistrationListener is ever created,
+ /// and so the lock can be a member variable of that instance. This ensures
+ /// destructors are run in the correct order.
+ sys::Mutex JITDebugLock;
+
/// A map of in-memory object files that have been registered with the
/// JIT interface.
RegisteredObjectBufferMap ObjectBufferMap;
-public:
/// Instantiates the JIT service.
GDBJITRegistrationListener() = default;
@@ -103,6 +109,12 @@ public:
/// internal resources.
~GDBJITRegistrationListener() override;
+public:
+ static GDBJITRegistrationListener &instance() {
+ static GDBJITRegistrationListener Instance;
+ return Instance;
+ }
+
/// Creates an entry in the JIT registry for the buffer @p Object,
/// which must contain an object file in executable memory with any
/// debug information for the debugger.
@@ -121,10 +133,6 @@ private:
void deregisterObjectInternal(RegisteredObjectBufferMap::iterator I);
};
-/// Lock used to serialize all jit registration events, since they
-/// modify global variables.
-ManagedStatic<sys::Mutex> JITDebugLock;
-
/// Do the registration.
void NotifyDebugger(jit_code_entry* JITCodeEntry) {
__jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
@@ -143,7 +151,7 @@ void NotifyDebugger(jit_code_entry* JITCodeEntry) {
GDBJITRegistrationListener::~GDBJITRegistrationListener() {
// Free all registered object files.
- std::lock_guard<llvm::sys::Mutex> locked(*JITDebugLock);
+ std::lock_guard<llvm::sys::Mutex> locked(JITDebugLock);
for (RegisteredObjectBufferMap::iterator I = ObjectBufferMap.begin(),
E = ObjectBufferMap.end();
I != E; ++I) {
@@ -167,7 +175,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded(
const char *Buffer = DebugObj.getBinary()->getMemoryBufferRef().getBufferStart();
size_t Size = DebugObj.getBinary()->getMemoryBufferRef().getBufferSize();
- std::lock_guard<llvm::sys::Mutex> locked(*JITDebugLock);
+ std::lock_guard<llvm::sys::Mutex> locked(JITDebugLock);
assert(ObjectBufferMap.find(K) == ObjectBufferMap.end() &&
"Second attempt to perform debug registration.");
jit_code_entry* JITCodeEntry = new jit_code_entry();
@@ -186,7 +194,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded(
}
void GDBJITRegistrationListener::notifyFreeingObject(ObjectKey K) {
- std::lock_guard<llvm::sys::Mutex> locked(*JITDebugLock);
+ std::lock_guard<llvm::sys::Mutex> locked(JITDebugLock);
RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(K);
if (I != ObjectBufferMap.end()) {
@@ -228,14 +236,12 @@ void GDBJITRegistrationListener::deregisterObjectInternal(
JITCodeEntry = nullptr;
}
-llvm::ManagedStatic<GDBJITRegistrationListener> GDBRegListener;
-
} // end namespace
namespace llvm {
JITEventListener* JITEventListener::createGDBRegistrationListener() {
- return &*GDBRegListener;
+ return &GDBJITRegistrationListener::instance();
}
} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFF.cpp b/llvm/lib/ExecutionEngine/JITLink/COFF.cpp
new file mode 100644
index 000000000000..fddc9b813fb2
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/COFF.cpp
@@ -0,0 +1,137 @@
+//===-------------- COFF.cpp - JIT linker function for COFF -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF jit-link function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/COFF.h"
+
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/ExecutionEngine/JITLink/COFF_x86_64.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cstring>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+static StringRef getMachineName(uint16_t Machine) {
+ switch (Machine) {
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ return "i386";
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ return "x86_64";
+ case COFF::IMAGE_FILE_MACHINE_ARMNT:
+ return "ARM";
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
+ return "ARM64";
+ default:
+ return "unknown";
+ }
+}
+
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromCOFFObject(MemoryBufferRef ObjectBuffer) {
+ StringRef Data = ObjectBuffer.getBuffer();
+
+ // Check magic
+ auto Magic = identify_magic(ObjectBuffer.getBuffer());
+ if (Magic != file_magic::coff_object)
+ return make_error<JITLinkError>("Invalid COFF buffer");
+
+ if (Data.size() < sizeof(object::coff_file_header))
+ return make_error<JITLinkError>("Truncated COFF buffer");
+
+ uint64_t CurPtr = 0;
+ bool IsPE = false;
+
+ // Check if this is a PE/COFF file.
+ if (Data.size() >= sizeof(object::dos_header) + sizeof(COFF::PEMagic)) {
+ const auto *DH =
+ reinterpret_cast<const object::dos_header *>(Data.data() + CurPtr);
+ if (DH->Magic[0] == 'M' && DH->Magic[1] == 'Z') {
+ // Check the PE magic bytes. ("PE\0\0")
+ CurPtr = DH->AddressOfNewExeHeader;
+ if (memcmp(Data.data() + CurPtr, COFF::PEMagic, sizeof(COFF::PEMagic)) !=
+ 0) {
+ return make_error<JITLinkError>("Incorrect PE magic");
+ }
+ CurPtr += sizeof(COFF::PEMagic);
+ IsPE = true;
+ }
+ }
+ if (Data.size() < CurPtr + sizeof(object::coff_file_header))
+ return make_error<JITLinkError>("Truncated COFF buffer");
+
+ const object::coff_file_header *COFFHeader =
+ reinterpret_cast<const object::coff_file_header *>(Data.data() + CurPtr);
+ const object::coff_bigobj_file_header *COFFBigObjHeader = nullptr;
+
+ // Deal with bigobj file
+ if (!IsPE && COFFHeader->Machine == COFF::IMAGE_FILE_MACHINE_UNKNOWN &&
+ COFFHeader->NumberOfSections == uint16_t(0xffff) &&
+ Data.size() >= sizeof(object::coff_bigobj_file_header)) {
+ if (Data.size() < sizeof(object::coff_file_header)) {
+ return make_error<JITLinkError>("Truncated COFF buffer");
+ }
+ COFFBigObjHeader =
+ reinterpret_cast<const object::coff_bigobj_file_header *>(Data.data() +
+ CurPtr);
+
+ // Verify that we are dealing with bigobj.
+ if (COFFBigObjHeader->Version >= COFF::BigObjHeader::MinBigObjectVersion &&
+ std::memcmp(COFFBigObjHeader->UUID, COFF::BigObjMagic,
+ sizeof(COFF::BigObjMagic)) == 0) {
+ COFFHeader = nullptr;
+ CurPtr += sizeof(object::coff_bigobj_file_header);
+ } else
+ COFFBigObjHeader = nullptr;
+ }
+
+ uint16_t Machine =
+ COFFHeader ? COFFHeader->Machine : COFFBigObjHeader->Machine;
+ LLVM_DEBUG({
+ dbgs() << "jitLink_COFF: PE = " << (IsPE ? "yes" : "no")
+ << ", bigobj = " << (COFFBigObjHeader ? "yes" : "no")
+ << ", identifier = \"" << ObjectBuffer.getBufferIdentifier() << "\" "
+ << "machine = " << getMachineName(Machine) << "\n";
+ });
+
+ switch (Machine) {
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ return createLinkGraphFromCOFFObject_x86_64(ObjectBuffer);
+ default:
+ return make_error<JITLinkError>(
+ "Unsupported target machine architecture in COFF object " +
+ ObjectBuffer.getBufferIdentifier() + ": " + getMachineName(Machine));
+ }
+}
+
+void link_COFF(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+ switch (G->getTargetTriple().getArch()) {
+ case Triple::x86_64:
+ link_COFF_x86_64(std::move(G), std::move(Ctx));
+ return;
+ default:
+ Ctx->notifyFailed(make_error<JITLinkError>(
+ "Unsupported target machine architecture in COFF link graph " +
+ G->getName()));
+ return;
+ }
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
new file mode 100644
index 000000000000..43b9c2ba400b
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
@@ -0,0 +1,527 @@
+//=--------- COFFLinkGraphBuilder.cpp - COFF LinkGraph builder ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic COFF LinkGraph buliding code.
+//
+//===----------------------------------------------------------------------===//
+#include "COFFLinkGraphBuilder.h"
+
+#define DEBUG_TYPE "jitlink"
+
+static const char *CommonSectionName = "__common";
+
+namespace llvm {
+namespace jitlink {
+
+COFFLinkGraphBuilder::COFFLinkGraphBuilder(
+ const object::COFFObjectFile &Obj, Triple TT,
+ LinkGraph::GetEdgeKindNameFunction GetEdgeKindName)
+ : Obj(Obj),
+ G(std::make_unique<LinkGraph>(
+ Obj.getFileName().str(), Triple(std::move(TT)), getPointerSize(Obj),
+ getEndianness(Obj), std::move(GetEdgeKindName))) {
+ LLVM_DEBUG({
+ dbgs() << "Created COFFLinkGraphBuilder for \"" << Obj.getFileName()
+ << "\"\n";
+ });
+}
+
+COFFLinkGraphBuilder::~COFFLinkGraphBuilder() = default;
+
+unsigned
+COFFLinkGraphBuilder::getPointerSize(const object::COFFObjectFile &Obj) {
+ return Obj.getBytesInAddress();
+}
+
+support::endianness
+COFFLinkGraphBuilder::getEndianness(const object::COFFObjectFile &Obj) {
+ return Obj.isLittleEndian() ? support::little : support::big;
+}
+
+uint64_t COFFLinkGraphBuilder::getSectionSize(const object::COFFObjectFile &Obj,
+ const object::coff_section *Sec) {
+ // Consider the difference between executable form and object form.
+ // More information is inside COFFObjectFile::getSectionSize
+ if (Obj.getDOSHeader())
+ return std::min(Sec->VirtualSize, Sec->SizeOfRawData);
+ return Sec->SizeOfRawData;
+}
+
+uint64_t
+COFFLinkGraphBuilder::getSectionAddress(const object::COFFObjectFile &Obj,
+ const object::coff_section *Section) {
+ return Section->VirtualAddress + Obj.getImageBase();
+}
+
+bool COFFLinkGraphBuilder::isComdatSection(
+ const object::coff_section *Section) {
+ return Section->Characteristics & COFF::IMAGE_SCN_LNK_COMDAT;
+}
+
+Section &COFFLinkGraphBuilder::getCommonSection() {
+ if (!CommonSection)
+ CommonSection =
+ &G->createSection(CommonSectionName, MemProt::Read | MemProt::Write);
+ return *CommonSection;
+}
+
+Expected<std::unique_ptr<LinkGraph>> COFFLinkGraphBuilder::buildGraph() {
+ if (!Obj.isRelocatableObject())
+ return make_error<JITLinkError>("Object is not a relocatable COFF file");
+
+ if (auto Err = graphifySections())
+ return std::move(Err);
+
+ if (auto Err = graphifySymbols())
+ return std::move(Err);
+
+ if (auto Err = addRelocations())
+ return std::move(Err);
+
+ return std::move(G);
+}
+
+StringRef
+COFFLinkGraphBuilder::getCOFFSectionName(COFFSectionIndex SectionIndex,
+ const object::coff_section *Sec,
+ object::COFFSymbolRef Sym) {
+ switch (SectionIndex) {
+ case COFF::IMAGE_SYM_UNDEFINED: {
+ if (Sym.getValue())
+ return "(common)";
+ else
+ return "(external)";
+ }
+ case COFF::IMAGE_SYM_ABSOLUTE:
+ return "(absolute)";
+ case COFF::IMAGE_SYM_DEBUG: {
+ // Used with .file symbol
+ return "(debug)";
+ }
+ default: {
+ // Non reserved regular section numbers
+ if (Expected<StringRef> SecNameOrErr = Obj.getSectionName(Sec))
+ return *SecNameOrErr;
+ }
+ }
+ return "";
+}
+
+Error COFFLinkGraphBuilder::graphifySections() {
+ LLVM_DEBUG(dbgs() << " Creating graph sections...\n");
+
+ GraphBlocks.resize(Obj.getNumberOfSections() + 1);
+ // For each section...
+ for (COFFSectionIndex SecIndex = 1;
+ SecIndex <= static_cast<COFFSectionIndex>(Obj.getNumberOfSections());
+ SecIndex++) {
+ Expected<const object::coff_section *> Sec = Obj.getSection(SecIndex);
+ if (!Sec)
+ return Sec.takeError();
+
+ StringRef SectionName;
+ if (Expected<StringRef> SecNameOrErr = Obj.getSectionName(*Sec))
+ SectionName = *SecNameOrErr;
+
+ bool IsDiscardable =
+ (*Sec)->Characteristics &
+ (COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_LNK_INFO);
+ if (IsDiscardable) {
+ LLVM_DEBUG(dbgs() << " " << SecIndex << ": \"" << SectionName
+ << "\" is discardable: "
+ "No graph section will be created.\n");
+ continue;
+ }
+
+ // FIXME: Skip debug info sections
+
+ LLVM_DEBUG({
+ dbgs() << " "
+ << "Creating section for \"" << SectionName << "\"\n";
+ });
+
+ // Get the section's memory protection flags.
+ MemProt Prot = MemProt::None;
+ if ((*Sec)->Characteristics & COFF::IMAGE_SCN_MEM_EXECUTE)
+ Prot |= MemProt::Exec;
+ if ((*Sec)->Characteristics & COFF::IMAGE_SCN_MEM_READ)
+ Prot |= MemProt::Read;
+ if ((*Sec)->Characteristics & COFF::IMAGE_SCN_MEM_WRITE)
+ Prot |= MemProt::Write;
+
+ // Look for existing sections first.
+ auto *GraphSec = G->findSectionByName(SectionName);
+ if (!GraphSec)
+ GraphSec = &G->createSection(SectionName, Prot);
+ if (GraphSec->getMemProt() != Prot)
+ return make_error<JITLinkError>("MemProt should match");
+
+ Block *B = nullptr;
+ if ((*Sec)->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+ B = &G->createZeroFillBlock(
+ *GraphSec, getSectionSize(Obj, *Sec),
+ orc::ExecutorAddr(getSectionAddress(Obj, *Sec)),
+ (*Sec)->getAlignment(), 0);
+ else {
+ ArrayRef<uint8_t> Data;
+ if (auto Err = Obj.getSectionContents(*Sec, Data))
+ return Err;
+
+ B = &G->createContentBlock(
+ *GraphSec,
+ ArrayRef<char>(reinterpret_cast<const char *>(Data.data()),
+ Data.size()),
+ orc::ExecutorAddr(getSectionAddress(Obj, *Sec)),
+ (*Sec)->getAlignment(), 0);
+ }
+
+ setGraphBlock(SecIndex, B);
+ }
+
+ return Error::success();
+}
+
+Error COFFLinkGraphBuilder::graphifySymbols() {
+ LLVM_DEBUG(dbgs() << " Creating graph symbols...\n");
+
+ SymbolSets.resize(Obj.getNumberOfSections() + 1);
+ GraphSymbols.resize(Obj.getNumberOfSymbols());
+
+ for (COFFSymbolIndex SymIndex = 0;
+ SymIndex < static_cast<COFFSymbolIndex>(Obj.getNumberOfSymbols());
+ SymIndex++) {
+ Expected<object::COFFSymbolRef> Sym = Obj.getSymbol(SymIndex);
+ if (!Sym)
+ return Sym.takeError();
+
+ StringRef SymbolName;
+ if (Expected<StringRef> SymNameOrErr = Obj.getSymbolName(*Sym))
+ SymbolName = *SymNameOrErr;
+
+ COFFSectionIndex SectionIndex = Sym->getSectionNumber();
+ const object::coff_section *Sec = nullptr;
+
+ if (!COFF::isReservedSectionNumber(SectionIndex)) {
+ auto SecOrErr = Obj.getSection(SectionIndex);
+ if (!SecOrErr)
+ return make_error<JITLinkError>(
+ "Invalid COFF section number:" + formatv("{0:d}: ", SectionIndex) +
+ " (" + toString(SecOrErr.takeError()) + ")");
+ Sec = *SecOrErr;
+ }
+
+ // Create jitlink symbol
+ jitlink::Symbol *GSym = nullptr;
+ if (Sym->isFileRecord())
+ LLVM_DEBUG({
+ dbgs() << " " << SymIndex << ": Skipping FileRecord symbol \""
+ << SymbolName << "\" in "
+ << getCOFFSectionName(SectionIndex, Sec, *Sym)
+ << " (index: " << SectionIndex << ") \n";
+ });
+ else if (Sym->isUndefined()) {
+ LLVM_DEBUG({
+ dbgs() << " " << SymIndex
+ << ": Creating external graph symbol for COFF symbol \""
+ << SymbolName << "\" in "
+ << getCOFFSectionName(SectionIndex, Sec, *Sym)
+ << " (index: " << SectionIndex << ") \n";
+ });
+ GSym =
+ &G->addExternalSymbol(SymbolName, Sym->getValue(), Linkage::Strong);
+ } else if (Sym->isWeakExternal()) {
+ COFFSymbolIndex TagIndex =
+ Sym->getAux<object::coff_aux_weak_external>()->TagIndex;
+ assert(Sym->getAux<object::coff_aux_weak_external>()->Characteristics !=
+ COFF::IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY &&
+ "IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY is not supported.");
+ assert(Sym->getAux<object::coff_aux_weak_external>()->Characteristics !=
+ COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY &&
+ "IMAGE_WEAK_EXTERN_SEARCH_LIBRARY is not supported.");
+ WeakAliasRequests.push_back({SymIndex, TagIndex, SymbolName});
+ } else {
+ Expected<jitlink::Symbol *> NewGSym =
+ createDefinedSymbol(SymIndex, SymbolName, *Sym, Sec);
+ if (!NewGSym)
+ return NewGSym.takeError();
+ GSym = *NewGSym;
+ if (GSym) {
+ LLVM_DEBUG({
+ dbgs() << " " << SymIndex
+ << ": Creating defined graph symbol for COFF symbol \""
+ << SymbolName << "\" in "
+ << getCOFFSectionName(SectionIndex, Sec, *Sym)
+ << " (index: " << SectionIndex << ") \n";
+ dbgs() << " " << *GSym << "\n";
+ });
+ }
+ }
+
+ // Register the symbol
+ if (GSym)
+ setGraphSymbol(SectionIndex, SymIndex, *GSym);
+ SymIndex += Sym->getNumberOfAuxSymbols();
+ }
+
+ if (auto Err = flushWeakAliasRequests())
+ return Err;
+
+ if (auto Err = calculateImplicitSizeOfSymbols())
+ return Err;
+
+ return Error::success();
+}
+
+Error COFFLinkGraphBuilder::flushWeakAliasRequests() {
+ // Export the weak external symbols and alias it
+ for (auto &WeakAlias : WeakAliasRequests) {
+ if (auto *Target = getGraphSymbol(WeakAlias.Target)) {
+ Expected<object::COFFSymbolRef> AliasSymbol =
+ Obj.getSymbol(WeakAlias.Alias);
+ if (!AliasSymbol)
+ return AliasSymbol.takeError();
+
+ // FIXME: Support this when there's a way to handle this.
+ if (!Target->isDefined())
+ return make_error<JITLinkError>("Weak external symbol with external "
+ "symbol as alternative not supported.");
+
+ jitlink::Symbol *NewSymbol = &G->addDefinedSymbol(
+ Target->getBlock(), Target->getOffset(), WeakAlias.SymbolName,
+ Target->getSize(), Linkage::Weak, Scope::Default,
+ Target->isCallable(), false);
+ setGraphSymbol(AliasSymbol->getSectionNumber(), WeakAlias.Alias,
+ *NewSymbol);
+ LLVM_DEBUG({
+ dbgs() << " " << WeakAlias.Alias
+ << ": Creating weak external symbol for COFF symbol \""
+ << WeakAlias.SymbolName << "\" in section "
+ << AliasSymbol->getSectionNumber() << "\n";
+ dbgs() << " " << *NewSymbol << "\n";
+ });
+ } else
+ return make_error<JITLinkError>("Weak symbol alias requested but actual "
+ "symbol not found for symbol " +
+ formatv("{0:d}", WeakAlias.Alias));
+ }
+ return Error::success();
+}
+
+// In COFF, most of the defined symbols don't contain the size information.
+// Hence, we calculate the "implicit" size of symbol by taking the delta of
+// offsets of consecutive symbols within a block. We maintain a balanced tree
+// set of symbols sorted by offset per each block in order to achieve
+// logarithmic time complexity of sorted symbol insertion. Symbol is inserted to
+// the set once it's processed in graphifySymbols. In this function, we iterate
+// each collected symbol in sorted order and calculate the implicit size.
+Error COFFLinkGraphBuilder::calculateImplicitSizeOfSymbols() {
+ for (COFFSectionIndex SecIndex = 1;
+ SecIndex <= static_cast<COFFSectionIndex>(Obj.getNumberOfSections());
+ SecIndex++) {
+ auto &SymbolSet = SymbolSets[SecIndex];
+ jitlink::Block *B = getGraphBlock(SecIndex);
+ orc::ExecutorAddrDiff LastOffset = B->getSize();
+ orc::ExecutorAddrDiff LastDifferentOffset = B->getSize();
+ orc::ExecutorAddrDiff LastSize = 0;
+ for (auto It = SymbolSet.rbegin(); It != SymbolSet.rend(); It++) {
+ orc::ExecutorAddrDiff Offset = It->first;
+ jitlink::Symbol *Symbol = It->second;
+ orc::ExecutorAddrDiff CandSize;
+ // Last offset can be same when aliasing happened
+ if (Symbol->getOffset() == LastOffset)
+ CandSize = LastSize;
+ else
+ CandSize = LastOffset - Offset;
+
+ LLVM_DEBUG({
+ if (Offset + Symbol->getSize() > LastDifferentOffset)
+ dbgs() << " Overlapping symbol range generated for the following "
+ "symbol:"
+ << "\n"
+ << " " << *Symbol << "\n";
+ });
+ (void)LastDifferentOffset;
+ if (LastOffset != Offset)
+ LastDifferentOffset = Offset;
+ LastSize = CandSize;
+ LastOffset = Offset;
+ if (Symbol->getSize()) {
+ // Non empty symbol can happen in COMDAT symbol.
+ // We don't consider the possibility of overlapping symbol range that
+ // could be introduced by disparity between inferred symbol size and
+ // defined symbol size because symbol size information is currently only
+ // used by jitlink-check where we have control to not make overlapping
+ // ranges.
+ continue;
+ }
+
+ LLVM_DEBUG({
+ if (!CandSize)
+ dbgs() << " Empty implicit symbol size generated for the following "
+ "symbol:"
+ << "\n"
+ << " " << *Symbol << "\n";
+ });
+
+ Symbol->setSize(CandSize);
+ }
+ }
+ return Error::success();
+}
+
+Expected<Symbol *> COFFLinkGraphBuilder::createDefinedSymbol(
+ COFFSymbolIndex SymIndex, StringRef SymbolName,
+ object::COFFSymbolRef Symbol, const object::coff_section *Section) {
+ if (Symbol.isCommon()) {
+ // FIXME: correct alignment
+ return &G->addCommonSymbol(SymbolName, Scope::Default, getCommonSection(),
+ orc::ExecutorAddr(), Symbol.getValue(),
+ Symbol.getValue(), false);
+ }
+ if (Symbol.isAbsolute())
+ return &G->addAbsoluteSymbol(SymbolName,
+ orc::ExecutorAddr(Symbol.getValue()), 0,
+ Linkage::Strong, Scope::Local, false);
+
+ if (llvm::COFF::isReservedSectionNumber(Symbol.getSectionNumber()))
+ return make_error<JITLinkError>(
+ "Reserved section number used in regular symbol " +
+ formatv("{0:d}", SymIndex));
+
+ Block *B = getGraphBlock(Symbol.getSectionNumber());
+ if (Symbol.isExternal()) {
+ // This is not a comdat sequence, export the symbol as it is
+ if (!isComdatSection(Section))
+ return &G->addDefinedSymbol(
+ *B, Symbol.getValue(), SymbolName, 0, Linkage::Strong, Scope::Default,
+ Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION, false);
+ else {
+ if (!PendingComdatExport)
+ return make_error<JITLinkError>("No pending COMDAT export for symbol " +
+ formatv("{0:d}", SymIndex));
+ if (PendingComdatExport->SectionIndex != Symbol.getSectionNumber())
+ return make_error<JITLinkError>(
+ "COMDAT export section number mismatch for symbol " +
+ formatv("{0:d}", SymIndex));
+ return exportCOMDATSymbol(SymIndex, SymbolName, Symbol);
+ }
+ }
+
+ if (Symbol.getStorageClass() == COFF::IMAGE_SYM_CLASS_STATIC) {
+ const object::coff_aux_section_definition *Definition =
+ Symbol.getSectionDefinition();
+ if (!Definition || !isComdatSection(Section)) {
+ // Handle typical static symbol
+ return &G->addDefinedSymbol(
+ *B, Symbol.getValue(), SymbolName, 0, Linkage::Strong, Scope::Local,
+ Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION, false);
+ }
+ if (Definition->Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+ // FIXME: don't dead strip this when parent section is alive
+ return &G->addDefinedSymbol(
+ *B, Symbol.getValue(), SymbolName, 0, Linkage::Strong, Scope::Local,
+ Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION, false);
+ }
+ if (PendingComdatExport)
+ return make_error<JITLinkError>(
+ "COMDAT export request already exists before symbol " +
+ formatv("{0:d}", SymIndex));
+ return createCOMDATExportRequest(SymIndex, Symbol, Definition);
+ }
+ return make_error<JITLinkError>("Unsupported storage class " +
+ formatv("{0:d}", Symbol.getStorageClass()) +
+ " in symbol " + formatv("{0:d}", SymIndex));
+}
+
+// COMDAT handling:
+// When IMAGE_SCN_LNK_COMDAT flag is set in the flags of a section,
+// the section is called a COMDAT section. It contains two symbols
+// in a sequence that specifes the behavior. First symbol is the section
+// symbol which contains the size and name of the section. It also contains
+// selection type that specifies how duplicate of the symbol is handled.
+// Second symbol is COMDAT symbol which usually defines the external name and
+// data type.
+//
+// Since two symbols always come in a specific order, we initiate pending COMDAT
+// export request when we encounter the first symbol and actually exports it
+// when we process the second symbol.
+//
+// Process the first symbol of COMDAT sequence.
+Expected<Symbol *> COFFLinkGraphBuilder::createCOMDATExportRequest(
+ COFFSymbolIndex SymIndex, object::COFFSymbolRef Symbol,
+ const object::coff_aux_section_definition *Definition) {
+ Block *B = getGraphBlock(Symbol.getSectionNumber());
+ Linkage L = Linkage::Strong;
+ switch (Definition->Selection) {
+ case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: {
+ L = Linkage::Strong;
+ break;
+ }
+ case COFF::IMAGE_COMDAT_SELECT_ANY: {
+ L = Linkage::Weak;
+ break;
+ }
+ case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH:
+ case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: {
+ // FIXME: Implement size/content validation when LinkGraph is able to
+ // handle this.
+ L = Linkage::Weak;
+ break;
+ }
+ case COFF::IMAGE_COMDAT_SELECT_LARGEST: {
+ // FIXME: Support IMAGE_COMDAT_SELECT_LARGEST when LinkGraph is able to
+ // handle this.
+ return make_error<JITLinkError>(
+ "IMAGE_COMDAT_SELECT_LARGEST is not supported.");
+ }
+ case COFF::IMAGE_COMDAT_SELECT_NEWEST: {
+ // Even link.exe doesn't support this selection properly.
+ return make_error<JITLinkError>(
+ "IMAGE_COMDAT_SELECT_NEWEST is not supported.");
+ }
+ default: {
+ return make_error<JITLinkError>("Invalid comdat selection type: " +
+ formatv("{0:d}", Definition->Selection));
+ }
+ }
+ PendingComdatExport = {SymIndex, Symbol.getSectionNumber(), L};
+ return &G->addAnonymousSymbol(*B, Symbol.getValue(), Definition->Length,
+ false, false);
+}
+
+// Process the second symbol of COMDAT sequence.
+Expected<Symbol *>
+COFFLinkGraphBuilder::exportCOMDATSymbol(COFFSymbolIndex SymIndex,
+ StringRef SymbolName,
+ object::COFFSymbolRef Symbol) {
+ COFFSymbolIndex TargetIndex = PendingComdatExport->SymbolIndex;
+ Linkage L = PendingComdatExport->Linkage;
+ jitlink::Symbol *Target = getGraphSymbol(TargetIndex);
+ assert(Target && "COMDAT leaader is invalid.");
+ assert((llvm::count_if(G->defined_symbols(),
+ [&](const jitlink::Symbol *Sym) {
+ return Sym->getName() == SymbolName;
+ }) == 0) &&
+ "Duplicate defined symbol");
+ Target->setName(SymbolName);
+ Target->setLinkage(L);
+ Target->setCallable(Symbol.getComplexType() ==
+ COFF::IMAGE_SYM_DTYPE_FUNCTION);
+ Target->setScope(Scope::Default);
+ LLVM_DEBUG({
+ dbgs() << " " << SymIndex
+ << ": Exporting COMDAT graph symbol for COFF symbol \"" << SymbolName
+ << "\" in section " << Symbol.getSectionNumber() << "\n";
+ dbgs() << " " << *Target << "\n";
+ });
+ PendingComdatExport = None;
+ return Target;
+}
+
+} // namespace jitlink
+} // namespace llvm \ No newline at end of file
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
new file mode 100644
index 000000000000..4dc1b14dc4a2
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
@@ -0,0 +1,199 @@
+//===----- COFFLinkGraphBuilder.h - COFF LinkGraph builder ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic COFF LinkGraph building code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_EXECUTIONENGINE_JITLINK_COFFLINKGRAPHBUILDER_H
+#define LIB_EXECUTIONENGINE_JITLINK_COFFLINKGRAPHBUILDER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Object/COFF.h"
+
+#include "EHFrameSupportImpl.h"
+#include "JITLinkGeneric.h"
+
+#define DEBUG_TYPE "jitlink"
+
+#include <list>
+
+namespace llvm {
+namespace jitlink {
+
+class COFFLinkGraphBuilder {
+public:
+ virtual ~COFFLinkGraphBuilder();
+ Expected<std::unique_ptr<LinkGraph>> buildGraph();
+
+protected:
+ using COFFSectionIndex = int32_t;
+ using COFFSymbolIndex = int32_t;
+
+ COFFLinkGraphBuilder(const object::COFFObjectFile &Obj, Triple TT,
+ LinkGraph::GetEdgeKindNameFunction GetEdgeKindName);
+
+ LinkGraph &getGraph() const { return *G; }
+
+ const object::COFFObjectFile &getObject() const { return Obj; }
+
+ virtual Error addRelocations() = 0;
+
+ Error graphifySections();
+ Error graphifySymbols();
+
+ void setGraphSymbol(COFFSectionIndex SecIndex, COFFSymbolIndex SymIndex,
+ Symbol &Sym) {
+ assert(!GraphSymbols[SymIndex] && "Duplicate symbol at index");
+ GraphSymbols[SymIndex] = &Sym;
+ if (!COFF::isReservedSectionNumber(SecIndex))
+ SymbolSets[SecIndex].insert({Sym.getOffset(), &Sym});
+ }
+
+ Symbol *getGraphSymbol(COFFSymbolIndex SymIndex) const {
+ if (SymIndex < 0 ||
+ SymIndex >= static_cast<COFFSymbolIndex>(GraphSymbols.size()))
+ return nullptr;
+ return GraphSymbols[SymIndex];
+ }
+
+ void setGraphBlock(COFFSectionIndex SecIndex, Block *B) {
+ assert(!GraphBlocks[SecIndex] && "Duplicate section at index");
+ assert(!COFF::isReservedSectionNumber(SecIndex) && "Invalid section index");
+ GraphBlocks[SecIndex] = B;
+ }
+
+ Block *getGraphBlock(COFFSectionIndex SecIndex) const {
+ if (SecIndex <= 0 ||
+ SecIndex >= static_cast<COFFSectionIndex>(GraphSymbols.size()))
+ return nullptr;
+ return GraphBlocks[SecIndex];
+ }
+
+ object::COFFObjectFile::section_iterator_range sections() const {
+ return Obj.sections();
+ }
+
+ /// Traverse all matching relocation records in the given section. The handler
+ /// function Func should be callable with this signature:
+ /// Error(const object::RelocationRef&,
+ /// const object::SectionRef&, Section &)
+ ///
+ template <typename RelocHandlerFunction>
+ Error forEachRelocation(const object::SectionRef &RelSec,
+ RelocHandlerFunction &&Func,
+ bool ProcessDebugSections = false);
+
+ /// Traverse all matching relocation records in the given section. Convenience
+ /// wrapper to allow passing a member function for the handler.
+ ///
+ template <typename ClassT, typename RelocHandlerMethod>
+ Error forEachRelocation(const object::SectionRef &RelSec, ClassT *Instance,
+ RelocHandlerMethod &&Method,
+ bool ProcessDebugSections = false) {
+ return forEachRelocation(
+ RelSec,
+ [Instance, Method](const auto &Rel, const auto &Target, auto &GS) {
+ return (Instance->*Method)(Rel, Target, GS);
+ },
+ ProcessDebugSections);
+ }
+
+private:
+ // Pending comdat symbol export that is initiated by the first symbol of
+ // COMDAT sequence.
+ struct ComdatExportRequest {
+ COFFSymbolIndex SymbolIndex;
+ COFFSectionIndex SectionIndex;
+ jitlink::Linkage Linkage;
+ };
+ Optional<ComdatExportRequest> PendingComdatExport;
+
+ // This represents a pending request to create a weak external symbol with a
+ // name.
+ struct WeakAliasRequest {
+ COFFSymbolIndex Alias;
+ COFFSymbolIndex Target;
+ StringRef SymbolName;
+ };
+ std::vector<WeakAliasRequest> WeakAliasRequests;
+
+ // Per COFF section jitlink symbol set sorted by offset.
+ // Used for calculating implicit size of defined symbols.
+ using SymbolSet = std::set<std::pair<orc::ExecutorAddrDiff, Symbol *>>;
+ std::vector<SymbolSet> SymbolSets;
+
+ Section &getCommonSection();
+
+ Expected<Symbol *> createDefinedSymbol(COFFSymbolIndex SymIndex,
+ StringRef SymbolName,
+ object::COFFSymbolRef Symbol,
+ const object::coff_section *Section);
+ Expected<Symbol *> createCOMDATExportRequest(
+ COFFSymbolIndex SymIndex, object::COFFSymbolRef Symbol,
+ const object::coff_aux_section_definition *Definition);
+ Expected<Symbol *> exportCOMDATSymbol(COFFSymbolIndex SymIndex,
+ StringRef SymbolName,
+ object::COFFSymbolRef Symbol);
+ Error flushWeakAliasRequests();
+ Error calculateImplicitSizeOfSymbols();
+
+ static uint64_t getSectionAddress(const object::COFFObjectFile &Obj,
+ const object::coff_section *Section);
+ static uint64_t getSectionSize(const object::COFFObjectFile &Obj,
+ const object::coff_section *Section);
+ static bool isComdatSection(const object::coff_section *Section);
+ static unsigned getPointerSize(const object::COFFObjectFile &Obj);
+ static support::endianness getEndianness(const object::COFFObjectFile &Obj);
+ StringRef getCOFFSectionName(COFFSectionIndex SectionIndex,
+ const object::coff_section *Sec,
+ object::COFFSymbolRef Sym);
+
+ const object::COFFObjectFile &Obj;
+ std::unique_ptr<LinkGraph> G;
+
+ Section *CommonSection = nullptr;
+ std::vector<Block *> GraphBlocks;
+ std::vector<Symbol *> GraphSymbols;
+};
+
+template <typename RelocHandlerFunction>
+Error COFFLinkGraphBuilder::forEachRelocation(const object::SectionRef &RelSec,
+ RelocHandlerFunction &&Func,
+ bool ProcessDebugSections) {
+
+ auto COFFRelSect = Obj.getCOFFSection(RelSec);
+
+ // Target sections have names in valid COFF object files.
+ Expected<StringRef> Name = Obj.getSectionName(COFFRelSect);
+ if (!Name)
+ return Name.takeError();
+ LLVM_DEBUG(dbgs() << " " << *Name << ":\n");
+
+ // Lookup the link-graph node corresponding to the target section name.
+ auto *BlockToFix = getGraphBlock(RelSec.getIndex() + 1);
+ if (!BlockToFix)
+ return make_error<StringError>(
+ "Referencing a section that wasn't added to the graph: " + *Name,
+ inconvertibleErrorCode());
+
+ // Let the callee process relocation entries one by one.
+ for (const auto &R : RelSec.relocations())
+ if (Error Err = Func(R, RelSec, *BlockToFix))
+ return Err;
+
+ LLVM_DEBUG(dbgs() << "\n");
+ return Error::success();
+}
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LIB_EXECUTIONENGINE_JITLINK_COFFLINKGRAPHBUILDER_H
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp
new file mode 100644
index 000000000000..3d36ad1ed767
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp
@@ -0,0 +1,216 @@
+//===----- COFF_x86_64.cpp - JIT linker implementation for COFF/x86_64 ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF/x86_64 jit-link implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/COFF_x86_64.h"
+#include "COFFLinkGraphBuilder.h"
+#include "EHFrameSupportImpl.h"
+#include "JITLinkGeneric.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/ExecutionEngine/JITLink/x86_64.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Endian.h"
+
+#define DEBUG_TYPE "jitlink"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+
+namespace {
+
+class COFFJITLinker_x86_64 : public JITLinker<COFFJITLinker_x86_64> {
+ friend class JITLinker<COFFJITLinker_x86_64>;
+
+public:
+ COFFJITLinker_x86_64(std::unique_ptr<JITLinkContext> Ctx,
+ std::unique_ptr<LinkGraph> G,
+ PassConfiguration PassConfig)
+ : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {}
+
+private:
+ Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
+ return x86_64::applyFixup(G, B, E, nullptr);
+ }
+};
+
+class COFFLinkGraphBuilder_x86_64 : public COFFLinkGraphBuilder {
+private:
+ uint64_t ImageBase = 0;
+ enum COFFX86RelocationKind {
+ COFFAddr32NB,
+ COFFRel32,
+ };
+
+ static Expected<COFFX86RelocationKind>
+ getRelocationKind(const uint32_t Type) {
+ switch (Type) {
+ case COFF::RelocationTypeAMD64::IMAGE_REL_AMD64_ADDR32NB:
+ return COFFAddr32NB;
+ case COFF::RelocationTypeAMD64::IMAGE_REL_AMD64_REL32:
+ return COFFRel32;
+ }
+
+ return make_error<JITLinkError>("Unsupported x86_64 relocation:" +
+ formatv("{0:d}", Type));
+ }
+
+ Error addRelocations() override {
+
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+
+ for (const auto &RelSect : sections())
+ if (Error Err = COFFLinkGraphBuilder::forEachRelocation(
+ RelSect, this, &COFFLinkGraphBuilder_x86_64::addSingleRelocation))
+ return Err;
+
+ return Error::success();
+ }
+
+ uint64_t getImageBase() {
+ if (!ImageBase) {
+ ImageBase = std::numeric_limits<uint64_t>::max();
+ for (const auto &Block : getGraph().blocks()) {
+ if (Block->getAddress().getValue())
+ ImageBase = std::min(ImageBase, Block->getAddress().getValue());
+ }
+ }
+ return ImageBase;
+ }
+
+ Error addSingleRelocation(const object::RelocationRef &Rel,
+ const object::SectionRef &FixupSect,
+ Block &BlockToFix) {
+
+ const object::coff_relocation *COFFRel = getObject().getCOFFRelocation(Rel);
+ auto SymbolIt = Rel.getSymbol();
+ if (SymbolIt == getObject().symbol_end()) {
+ return make_error<StringError>(
+ formatv("Invalid symbol index in relocation entry. "
+ "index: {0}, section: {1}",
+ COFFRel->SymbolTableIndex, FixupSect.getIndex()),
+ inconvertibleErrorCode());
+ }
+
+ object::COFFSymbolRef COFFSymbol = getObject().getCOFFSymbol(*SymbolIt);
+ COFFSymbolIndex SymIndex = getObject().getSymbolIndex(COFFSymbol);
+
+ Symbol *GraphSymbol = getGraphSymbol(SymIndex);
+ if (!GraphSymbol)
+ return make_error<StringError>(
+ formatv("Could not find symbol at given index, did you add it to "
+ "JITSymbolTable? index: {0}, section: {1}",
+ SymIndex, FixupSect.getIndex()),
+ inconvertibleErrorCode());
+
+ Expected<COFFX86RelocationKind> RelocKind =
+ getRelocationKind(Rel.getType());
+ if (!RelocKind)
+ return RelocKind.takeError();
+
+ int64_t Addend = 0;
+ orc::ExecutorAddr FixupAddress =
+ orc::ExecutorAddr(FixupSect.getAddress()) + Rel.getOffset();
+ Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress();
+
+ Edge::Kind Kind = Edge::Invalid;
+
+ switch (*RelocKind) {
+ case COFFAddr32NB: {
+ Kind = x86_64::Pointer32;
+ Offset -= getImageBase();
+ break;
+ }
+ case COFFRel32: {
+ Kind = x86_64::BranchPCRel32;
+ break;
+ }
+ };
+
+ Edge GE(Kind, Offset, *GraphSymbol, Addend);
+ LLVM_DEBUG({
+ dbgs() << " ";
+ printEdge(dbgs(), BlockToFix, GE, x86_64::getEdgeKindName(Kind));
+ dbgs() << "\n";
+ });
+
+ BlockToFix.addEdge(std::move(GE));
+ return Error::success();
+ }
+
+ /// Return the string name of the given COFF x86_64 edge kind.
+ const char *getCOFFX86RelocationKindName(COFFX86RelocationKind R) {
+ switch (R) {
+ case COFFAddr32NB:
+ return "COFFAddr32NB";
+ case COFFRel32:
+ return "COFFRel32";
+ }
+ }
+
+public:
+ COFFLinkGraphBuilder_x86_64(const object::COFFObjectFile &Obj, const Triple T)
+ : COFFLinkGraphBuilder(Obj, std::move(T), x86_64::getEdgeKindName) {}
+};
+
+Error buildTables_COFF_x86_64(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
+
+ x86_64::GOTTableManager GOT;
+ x86_64::PLTTableManager PLT(GOT);
+ visitExistingEdges(G, GOT, PLT);
+ return Error::success();
+}
+} // namespace
+
+namespace llvm {
+namespace jitlink {
+
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromCOFFObject_x86_64(MemoryBufferRef ObjectBuffer) {
+ LLVM_DEBUG({
+ dbgs() << "Building jitlink graph for new input "
+ << ObjectBuffer.getBufferIdentifier() << "...\n";
+ });
+
+ auto COFFObj = object::ObjectFile::createCOFFObjectFile(ObjectBuffer);
+ if (!COFFObj)
+ return COFFObj.takeError();
+
+ return COFFLinkGraphBuilder_x86_64(**COFFObj, (*COFFObj)->makeTriple())
+ .buildGraph();
+}
+
+void link_COFF_x86_64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+ PassConfiguration Config;
+ const Triple &TT = G->getTargetTriple();
+ if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+ // Add a mark-live pass.
+ if (auto MarkLive = Ctx->getMarkLivePass(TT))
+ Config.PrePrunePasses.push_back(std::move(MarkLive));
+ else
+ Config.PrePrunePasses.push_back(markAllSymbolsLive);
+
+ // Add an in-place GOT/Stubs/TLSInfoEntry build pass.
+ Config.PostPrunePasses.push_back(buildTables_COFF_x86_64);
+
+ // Add GOT/Stubs optimizer pass.
+ Config.PreFixupPasses.push_back(x86_64::optimizeGOTAndStubAccesses);
+ }
+
+ if (auto Err = Ctx->modifyPassConfig(*G, Config))
+ return Ctx->notifyFailed(std::move(Err));
+
+ COFFJITLinker_x86_64::link(std::move(Ctx), std::move(G), std::move(Config));
+}
+
+} // namespace jitlink
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
index b1492cd74508..389fd14c0f29 100644
--- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -33,7 +33,7 @@ Error EHFrameEdgeFixer::operator()(LinkGraph &G) {
if (!EHFrame) {
LLVM_DEBUG({
dbgs() << "EHFrameEdgeFixer: No " << EHFrameSectionName
- << " section. Nothing to do\n";
+ << " section in \"" << G.getName() << "\". Nothing to do.\n";
});
return Error::success();
}
@@ -44,7 +44,8 @@ Error EHFrameEdgeFixer::operator()(LinkGraph &G) {
"EHFrameEdgeFixer only supports 32 and 64 bit targets");
LLVM_DEBUG({
- dbgs() << "EHFrameEdgeFixer: Processing " << EHFrameSectionName << "...\n";
+ dbgs() << "EHFrameEdgeFixer: Processing " << EHFrameSectionName << " in \""
+ << G.getName() << "\"...\n";
});
ParseContext PC(G);
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
index 98da3f155c3e..7d67e5ef343a 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
@@ -63,6 +63,10 @@ private:
ELFPrel64,
ELFAdrGOTPage21,
ELFLd64GOTLo12,
+ ELFTLSDescAdrPage21,
+ ELFTLSDescAddLo12,
+ ELFTLSDescLd64Lo12,
+ ELFTLSDescCall,
};
static Expected<ELFAArch64RelocationKind>
@@ -104,6 +108,14 @@ private:
return ELFAdrGOTPage21;
case ELF::R_AARCH64_LD64_GOT_LO12_NC:
return ELFLd64GOTLo12;
+ case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
+ return ELFTLSDescAdrPage21;
+ case ELF::R_AARCH64_TLSDESC_ADD_LO12:
+ return ELFTLSDescAddLo12;
+ case ELF::R_AARCH64_TLSDESC_LD64_LO12:
+ return ELFTLSDescLd64Lo12;
+ case ELF::R_AARCH64_TLSDESC_CALL:
+ return ELFTLSDescCall;
}
return make_error<JITLinkError>(
@@ -292,6 +304,21 @@ private:
Kind = aarch64::GOTPageOffset12;
break;
}
+ case ELFTLSDescAdrPage21: {
+ Kind = aarch64::TLSDescPage21;
+ break;
+ }
+ case ELFTLSDescAddLo12: {
+ Kind = aarch64::TLSDescPageOffset12;
+ break;
+ }
+ case ELFTLSDescLd64Lo12: {
+ Kind = aarch64::TLSDescPageOffset12;
+ break;
+ }
+ case ELFTLSDescCall: {
+ return Error::success();
+ }
};
Edge GE(Kind, Offset, *GraphSymbol, Addend);
@@ -302,6 +329,7 @@ private:
});
BlockToFix.addEdge(std::move(GE));
+
return Error::success();
}
@@ -342,6 +370,14 @@ private:
return "ELFAdrGOTPage21";
case ELFLd64GOTLo12:
return "ELFLd64GOTLo12";
+ case ELFTLSDescAdrPage21:
+ return "ELFTLSDescAdrPage21";
+ case ELFTLSDescAddLo12:
+ return "ELFTLSDescAddLo12";
+ case ELFTLSDescLd64Lo12:
+ return "ELFTLSDescLd64Lo12";
+ case ELFTLSDescCall:
+ return "ELFTLSDescCall";
default:
return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
}
@@ -354,12 +390,133 @@ public:
aarch64::getEdgeKindName) {}
};
+// TLS Info Builder.
+class TLSInfoTableManager_ELF_aarch64
+ : public TableManager<TLSInfoTableManager_ELF_aarch64> {
+public:
+ static StringRef getSectionName() { return "$__TLSINFO"; }
+
+ static const uint8_t TLSInfoEntryContent[16];
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) { return false; }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ // the TLS Info entry's key value will be written by the fixTLVSectionByName
+ // pass, so create mutable content.
+ auto &TLSInfoEntry = G.createMutableContentBlock(
+ getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()),
+ orc::ExecutorAddr(), 8, 0);
+ TLSInfoEntry.addEdge(aarch64::Pointer64, 8, Target, 0);
+ return G.addAnonymousSymbol(TLSInfoEntry, 0, 16, false, false);
+ }
+
+private:
+ Section &getTLSInfoSection(LinkGraph &G) {
+ if (!TLSInfoTable)
+ TLSInfoTable = &G.createSection(getSectionName(), MemProt::Read);
+ return *TLSInfoTable;
+ }
+
+ ArrayRef<char> getTLSInfoEntryContent() const {
+ return {reinterpret_cast<const char *>(TLSInfoEntryContent),
+ sizeof(TLSInfoEntryContent)};
+ }
+
+ Section *TLSInfoTable = nullptr;
+};
+
+const uint8_t TLSInfoTableManager_ELF_aarch64::TLSInfoEntryContent[16] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*pthread key */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /*data address*/
+};
+
+// TLS Descriptor Builder.
+class TLSDescTableManager_ELF_aarch64
+ : public TableManager<TLSDescTableManager_ELF_aarch64> {
+public:
+ TLSDescTableManager_ELF_aarch64(
+ TLSInfoTableManager_ELF_aarch64 &TLSInfoTableManager)
+ : TLSInfoTableManager(TLSInfoTableManager) {}
+
+ static StringRef getSectionName() { return "$__TLSDESC"; }
+
+ static const uint8_t TLSDescEntryContent[16];
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ Edge::Kind KindToSet = Edge::Invalid;
+ switch (E.getKind()) {
+ case aarch64::TLSDescPage21: {
+ KindToSet = aarch64::Page21;
+ break;
+ }
+ case aarch64::TLSDescPageOffset12: {
+ KindToSet = aarch64::PageOffset12;
+ break;
+ }
+ default:
+ return false;
+ }
+ assert(KindToSet != Edge::Invalid &&
+ "Fell through switch, but no new kind to set");
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << B->getFixupAddress(E) << " (" << B->getAddress() << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ E.setKind(KindToSet);
+ E.setTarget(getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ auto &EntryBlock =
+ G.createContentBlock(getTLSDescSection(G), getTLSDescBlockContent(),
+ orc::ExecutorAddr(), 8, 0);
+ EntryBlock.addEdge(aarch64::Pointer64, 0, getTLSDescResolver(G), 0);
+ EntryBlock.addEdge(aarch64::Pointer64, 8,
+ TLSInfoTableManager.getEntryForTarget(G, Target), 0);
+ return G.addAnonymousSymbol(EntryBlock, 0, 8, false, false);
+ }
+
+private:
+ Section &getTLSDescSection(LinkGraph &G) {
+ if (!GOTSection)
+ GOTSection = &G.createSection(getSectionName(), MemProt::Read);
+ return *GOTSection;
+ }
+
+ Symbol &getTLSDescResolver(LinkGraph &G) {
+ if (!TLSDescResolver)
+ TLSDescResolver =
+ &G.addExternalSymbol("__tlsdesc_resolver", 8, Linkage::Strong);
+ return *TLSDescResolver;
+ }
+
+ ArrayRef<char> getTLSDescBlockContent() {
+ return {reinterpret_cast<const char *>(TLSDescEntryContent),
+ sizeof(TLSDescEntryContent)};
+ }
+
+ Section *GOTSection = nullptr;
+ Symbol *TLSDescResolver = nullptr;
+ TLSInfoTableManager_ELF_aarch64 &TLSInfoTableManager;
+};
+
+const uint8_t TLSDescTableManager_ELF_aarch64::TLSDescEntryContent[16] = {
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, /*resolver function pointer*/
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00 /*pointer to tls info*/
+};
+
Error buildTables_ELF_aarch64(LinkGraph &G) {
LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
aarch64::GOTTableManager GOT;
aarch64::PLTTableManager PLT(GOT);
- visitExistingEdges(G, GOT, PLT);
+ TLSInfoTableManager_ELF_aarch64 TLSInfo;
+ TLSDescTableManager_ELF_aarch64 TLSDesc(TLSInfo);
+ visitExistingEdges(G, GOT, PLT, TLSDesc, TLSInfo);
return Error::success();
}
@@ -406,7 +563,7 @@ void link_ELF_aarch64(std::unique_ptr<LinkGraph> G,
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
- // Add an in-place GOT/Stubs build pass.
+ // Add an in-place GOT/TLS/Stubs build pass.
Config.PostPrunePasses.push_back(buildTables_ELF_aarch64);
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
index 197ab71f5274..c7596efe2bb8 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
@@ -494,6 +494,30 @@ private:
Block &BlockToFix) {
using Base = ELFLinkGraphBuilder<ELFT>;
+ uint32_t Type = Rel.getType(false);
+ // We do not implement linker relaxation, except what is required for
+ // alignment (see below).
+ if (Type == llvm::ELF::R_RISCV_RELAX)
+ return Error::success();
+
+ int64_t Addend = Rel.r_addend;
+ if (Type == llvm::ELF::R_RISCV_ALIGN) {
+ uint64_t Alignment = PowerOf2Ceil(Addend);
+ // FIXME: Implement support for ensuring alignment together with linker
+ // relaxation; 2 bytes are guaranteed by the length of compressed
+ // instructions, so this does not need any action from our side.
+ if (Alignment > 2)
+ return make_error<JITLinkError>(
+ formatv("Unsupported relocation R_RISCV_ALIGN with alignment {0} "
+ "larger than 2 (addend: {1})",
+ Alignment, Addend));
+ return Error::success();
+ }
+
+ Expected<riscv::EdgeKind_riscv> Kind = getRelocationKind(Type);
+ if (!Kind)
+ return Kind.takeError();
+
uint32_t SymbolIndex = Rel.getSymbol(false);
auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
if (!ObjSymbol)
@@ -508,12 +532,6 @@ private:
Base::GraphSymbols.size()),
inconvertibleErrorCode());
- uint32_t Type = Rel.getType(false);
- Expected<riscv::EdgeKind_riscv> Kind = getRelocationKind(Type);
- if (!Kind)
- return Kind.takeError();
-
- int64_t Addend = Rel.r_addend;
auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset;
Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress();
Edge GE(*Kind, Offset, *GraphSymbol, Addend);
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
index 43efe0725cfe..08fdc7c9e6b1 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -9,10 +9,10 @@
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/ExecutionEngine/JITLink/COFF.h"
#include "llvm/ExecutionEngine/JITLink/ELF.h"
#include "llvm/ExecutionEngine/JITLink/MachO.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
@@ -41,8 +41,6 @@ public:
}
};
-static ManagedStatic<JITLinkerErrorCategory> JITLinkerErrorCategory;
-
} // namespace
namespace llvm {
@@ -53,7 +51,8 @@ char JITLinkError::ID = 0;
void JITLinkError::log(raw_ostream &OS) const { OS << ErrMsg; }
std::error_code JITLinkError::convertToErrorCode() const {
- return std::error_code(GenericJITLinkError, *JITLinkerErrorCategory);
+ static JITLinkerErrorCategory TheJITLinkerErrorCategory;
+ return std::error_code(GenericJITLinkError, TheJITLinkerErrorCategory);
}
const char *getGenericEdgeKindName(Edge::Kind K) {
@@ -410,6 +409,8 @@ createLinkGraphFromObject(MemoryBufferRef ObjectBuffer) {
return createLinkGraphFromMachOObject(ObjectBuffer);
case file_magic::elf_relocatable:
return createLinkGraphFromELFObject(ObjectBuffer);
+ case file_magic::coff_object:
+ return createLinkGraphFromCOFFObject(ObjectBuffer);
default:
return make_error<JITLinkError>("Unsupported file format");
};
@@ -421,6 +422,8 @@ void link(std::unique_ptr<LinkGraph> G, std::unique_ptr<JITLinkContext> Ctx) {
return link_MachO(std::move(G), std::move(Ctx));
case Triple::ELF:
return link_ELF(std::move(G), std::move(Ctx));
+ case Triple::COFF:
+ return link_COFF(std::move(G), std::move(Ctx));
default:
Ctx->notifyFailed(make_error<JITLinkError>("Unsupported object format"));
};
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index dd50314d3ed7..04194318498f 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -425,7 +425,7 @@ private:
else
return TargetSymbolOrErr.takeError();
- Kind = aarch64::PointerToGOT;
+ Kind = aarch64::Delta32ToGOT;
break;
case MachODelta32:
case MachODelta64: {
diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
index 28a6f9ce90d9..9ecc71dfbb54 100644
--- a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
@@ -48,8 +48,12 @@ const char *getEdgeKindName(Edge::Kind R) {
return "TLVPage21";
case TLVPageOffset12:
return "TLVPageOffset12";
- case PointerToGOT:
- return "PointerToGOT";
+ case TLSDescPage21:
+ return "TLSDescPage21";
+ case TLSDescPageOffset12:
+ return "TLSDescPageOffset12";
+ case Delta32ToGOT:
+ return "Delta32ToGOT";
case PairedAddend:
return "PairedAddend";
case LDRLiteral19:
diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
index e476c549412a..e7ca636c83e9 100644
--- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
@@ -839,11 +839,13 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::registerInitSections(
Error ELFNixPlatform::ELFNixPlatformPlugin::fixTLVSectionsAndEdges(
jitlink::LinkGraph &G, JITDylib &JD) {
- // TODO implement TLV support
- for (auto *Sym : G.external_symbols())
+ for (auto *Sym : G.external_symbols()) {
if (Sym->getName() == "__tls_get_addr") {
Sym->setName("___orc_rt_elfnix_tls_get_addr");
+ } else if (Sym->getName() == "__tlsdesc_resolver") {
+ Sym->setName("___orc_rt_elfnix_tlsdesc_resolver");
}
+ }
auto *TLSInfoEntrySection = G.findSectionByName("$__TLSINFO");
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 6d67e6d87b56..1926ef1ecc72 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -666,8 +666,9 @@ Error LLJITBuilderState::prepareForConstruction() {
// JIT linker.
if (!CreateObjectLinkingLayer) {
auto &TT = JTMB->getTargetTriple();
- if (TT.isOSBinFormatMachO() &&
- (TT.getArch() == Triple::aarch64 || TT.getArch() == Triple::x86_64)) {
+ if (TT.getArch() == Triple::riscv64 ||
+ (TT.isOSBinFormatMachO() &&
+ (TT.getArch() == Triple::aarch64 || TT.getArch() == Triple::x86_64))) {
JTMB->setRelocationModel(Reloc::PIC_);
JTMB->setCodeModel(CodeModel::Small);
diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
index 394a555e453b..356b81b4f1c5 100644
--- a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
@@ -9,6 +9,7 @@
#include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h"
#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
+#include "llvm/Object/COFF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
@@ -145,6 +146,55 @@ getELFObjectFileSymbolInfo(ExecutionSession &ES,
return I;
}
+static Expected<MaterializationUnit::Interface>
+getCOFFObjectFileSymbolInfo(ExecutionSession &ES,
+ const object::COFFObjectFile &Obj) {
+ MaterializationUnit::Interface I;
+
+ for (auto &Sym : Obj.symbols()) {
+ Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
+ if (!SymFlagsOrErr)
+ // TODO: Test this error.
+ return SymFlagsOrErr.takeError();
+
+ // Skip symbols not defined in this object file.
+ if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
+ continue;
+
+ // Skip symbols that are not global.
+ if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
+ continue;
+
+ // Skip symbols that have type SF_File.
+ if (auto SymType = Sym.getType()) {
+ if (*SymType == object::SymbolRef::ST_File)
+ continue;
+ } else
+ return SymType.takeError();
+
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+
+ auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
+ if (!SymFlags)
+ return SymFlags.takeError();
+ *SymFlags |= JITSymbolFlags::Exported;
+ auto COFFSym = Obj.getCOFFSymbol(Sym);
+
+ // Weak external is always a function
+ if (COFFSym.isWeakExternal()) {
+ *SymFlags |= JITSymbolFlags::Callable;
+ }
+
+ I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags);
+ }
+
+ // FIXME: handle init symbols
+
+ return I;
+}
+
Expected<MaterializationUnit::Interface>
getGenericObjectFileSymbolInfo(ExecutionSession &ES,
const object::ObjectFile &Obj) {
@@ -196,6 +246,8 @@ getObjectFileInterface(ExecutionSession &ES, MemoryBufferRef ObjBuffer) {
return getMachOObjectFileSymbolInfo(ES, *MachOObj);
else if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj->get()))
return getELFObjectFileSymbolInfo(ES, *ELFObj);
+ else if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj->get()))
+ return getCOFFObjectFileSymbolInfo(ES, *COFFObj);
return getGenericObjectFileSymbolInfo(ES, **Obj);
}
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index ef764a3f0d7f..da8aaad08cad 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -665,7 +665,7 @@ void OrcMips32_Base::writeIndirectStubsBlock(
//
// i..
- assert(stubAndPointerRangesOk<OrcAArch64>(
+ assert(stubAndPointerRangesOk<OrcMips32_Base>(
StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
"PointersBlock is out of range");
@@ -884,7 +884,7 @@ void OrcMips64::writeIndirectStubsBlock(
//
// ...
- assert(stubAndPointerRangesOk<OrcAArch64>(
+ assert(stubAndPointerRangesOk<OrcMips64>(
StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
"PointersBlock is out of range");
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp
index fdad90cbcfb7..2cc2bddeb21a 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp
@@ -12,7 +12,6 @@
#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include <type_traits>
@@ -70,7 +69,10 @@ public:
}
};
-static ManagedStatic<OrcErrorCategory> OrcErrCat;
+OrcErrorCategory &getOrcErrCat() {
+ static OrcErrorCategory OrcErrCat;
+ return OrcErrCat;
+}
} // namespace
namespace llvm {
@@ -81,7 +83,7 @@ char JITSymbolNotFound::ID = 0;
std::error_code orcError(OrcErrorCode ErrCode) {
typedef std::underlying_type<OrcErrorCode>::type UT;
- return std::error_code(static_cast<UT>(ErrCode), *OrcErrCat);
+ return std::error_code(static_cast<UT>(ErrCode), getOrcErrCat());
}
DuplicateDefinition::DuplicateDefinition(std::string SymbolName)
@@ -105,7 +107,7 @@ JITSymbolNotFound::JITSymbolNotFound(std::string SymbolName)
std::error_code JITSymbolNotFound::convertToErrorCode() const {
typedef std::underlying_type<OrcErrorCode>::type UT;
return std::error_code(static_cast<UT>(OrcErrorCode::JITSymbolNotFound),
- *OrcErrCat);
+ getOrcErrCat());
}
void JITSymbolNotFound::log(raw_ostream &OS) const {
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
index ffa2969536e7..8296b03398a0 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
@@ -11,7 +11,6 @@
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/ManagedStatic.h"
#include <cstdint>
#include <mutex>
@@ -67,9 +66,6 @@ LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() {
using namespace llvm;
using namespace llvm::orc;
-// Serialize rendezvous with the debugger as well as access to shared data.
-ManagedStatic<std::mutex> JITDebugLock;
-
// Register debug object, return error message or null for success.
static void registerJITLoaderGDBImpl(const char *ObjAddr, size_t Size) {
LLVM_DEBUG({
@@ -85,7 +81,9 @@ static void registerJITLoaderGDBImpl(const char *ObjAddr, size_t Size) {
E->symfile_size = Size;
E->prev_entry = nullptr;
- std::lock_guard<std::mutex> Lock(*JITDebugLock);
+ // Serialize rendezvous with the debugger as well as access to shared data.
+ static std::mutex JITDebugLock;
+ std::lock_guard<std::mutex> Lock(JITDebugLock);
// Insert this entry at the head of the list.
jit_code_entry *NextEntry = __jit_debug_descriptor.first_entry;
diff --git a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index 4a236e183c8b..bb41bac32534 100644
--- a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errno.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/Path.h"
@@ -488,15 +487,14 @@ void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
}
}
-// There should be only a single event listener per process, otherwise perf gets
-// confused.
-llvm::ManagedStatic<PerfJITEventListener> PerfListener;
-
} // end anonymous namespace
namespace llvm {
JITEventListener *JITEventListener::createPerfJITEventListener() {
- return &*PerfListener;
+ // There should be only a single event listener per process, otherwise perf
+ // gets confused.
+ static PerfJITEventListener PerfListener;
+ return &PerfListener;
}
} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 2e0cba849165..54ab00732330 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -19,7 +19,6 @@
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include <mutex>
@@ -51,8 +50,6 @@ public:
}
};
-static ManagedStatic<RuntimeDyldErrorCategory> RTDyldErrorCategory;
-
}
char RuntimeDyldError::ID = 0;
@@ -62,7 +59,8 @@ void RuntimeDyldError::log(raw_ostream &OS) const {
}
std::error_code RuntimeDyldError::convertToErrorCode() const {
- return std::error_code(GenericRTDyldError, *RTDyldErrorCategory);
+ static RuntimeDyldErrorCategory RTDyldErrorCategory;
+ return std::error_code(GenericRTDyldError, RTDyldErrorCategory);
}
// Empty out-of-line virtual destructor as the key function.
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index da1102fc9f07..c702584b7a33 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -479,7 +479,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
*TargetPtr &= 0xfff8001fU;
// Immediate:15:2 goes in bits 18:5 of TBZ, TBNZ
- or32le(TargetPtr, (BranchImm & 0x0FFFFFFC) << 3);
+ or32le(TargetPtr, (BranchImm & 0x0000FFFC) << 3);
break;
}
case ELF::R_AARCH64_CALL26: // fallthrough
diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp
index 6e8856f481af..0f846f7bfee5 100644
--- a/llvm/lib/Frontend/OpenMP/OMPContext.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp
@@ -214,7 +214,7 @@ static int isVariantApplicableInContextHelper(
Optional<bool> Result = HandleTrait(Property, IsActiveTrait);
if (Result)
- return Result.getValue();
+ return Result.value();
}
if (!DeviceSetOnly) {
@@ -235,7 +235,7 @@ static int isVariantApplicableInContextHelper(
Optional<bool> Result = HandleTrait(Property, FoundInOrder);
if (Result)
- return Result.getValue();
+ return Result.value();
if (!FoundInOrder) {
LLVM_DEBUG(dbgs() << "[" << DEBUG_TYPE << "] Construct property "
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 9b08a24e14d4..574d9174bebf 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -791,6 +791,38 @@ void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name,
Entry->setAlignment(Align(1));
}
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
+ const LocationDescription &Loc, Value *&Return, Value *Ident,
+ Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr,
+ ArrayRef<Value *> KernelArgs, ArrayRef<Value *> NoWaitArgs) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ auto *KernelArgsPtr =
+ Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs, nullptr, "kernel_args");
+ for (unsigned I = 0, Size = KernelArgs.size(); I != Size; ++I) {
+ llvm::Value *Arg =
+ Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr, I);
+ Builder.CreateAlignedStore(
+ KernelArgs[I], Arg,
+ M.getDataLayout().getPrefTypeAlign(KernelArgs[I]->getType()));
+ }
+
+ bool HasNoWait = !NoWaitArgs.empty();
+ SmallVector<Value *> OffloadingArgs{Ident, DeviceID, NumTeams,
+ NumThreads, HostPtr, KernelArgsPtr};
+ if (HasNoWait)
+ OffloadingArgs.append(NoWaitArgs.begin(), NoWaitArgs.end());
+
+ Return = Builder.CreateCall(
+ HasNoWait
+ ? getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel_nowait)
+ : getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
+ OffloadingArgs);
+
+ return Builder.saveIP();
+}
+
void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
omp::Directive CanceledDirective,
FinalizeCallbackTy ExitCB) {
@@ -1260,6 +1292,9 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
if (!updateToLocation(Loc))
return InsertPointTy();
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
// The current basic block is split into four basic blocks. After outlining,
// they will be mapped as follows:
// ```
@@ -1285,7 +1320,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
OI.EntryBB = TaskAllocaBB;
OI.OuterAllocaBB = AllocaIP.getBlock();
OI.ExitBB = TaskExitBB;
- OI.PostOutlineCB = [this, &Loc, Tied, Final](Function &OutlinedFn) {
+ OI.PostOutlineCB = [this, Ident, Tied, Final](Function &OutlinedFn) {
// The input IR here looks like the following-
// ```
// func @current_fn() {
@@ -1324,9 +1359,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
// Arguments - `loc_ref` (Ident) and `gtid` (ThreadID)
// call.
- uint32_t SrcLocStrSize;
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
- Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
Value *ThreadID = getOrCreateThreadID(Ident);
// Argument - `flags`
@@ -2834,7 +2866,8 @@ void OpenMPIRBuilder::unrollLoopHeuristic(DebugLoc, CanonicalLoopInfo *Loop) {
});
}
-void OpenMPIRBuilder::applySimd(DebugLoc, CanonicalLoopInfo *CanonicalLoop) {
+void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
+ ConstantInt *Simdlen) {
LLVMContext &Ctx = Builder.getContext();
Function *F = CanonicalLoop->getFunction();
@@ -2879,6 +2912,11 @@ void OpenMPIRBuilder::applySimd(DebugLoc, CanonicalLoopInfo *CanonicalLoop) {
AccessGroup}),
MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
BoolConst})});
+ if (Simdlen != nullptr)
+ addLoopMetadata(
+ CanonicalLoop,
+ MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.width"),
+ ConstantAsMetadata::get(Simdlen)}));
}
/// Create the TargetMachine object to query the backend for optimization
@@ -3962,6 +4000,8 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FMax:
+ case AtomicRMWInst::FMin:
llvm_unreachable("Unsupported atomic update operation");
}
llvm_unreachable("Unsupported atomic update operation");
@@ -4126,20 +4166,37 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
assert(X.Var->getType()->isPointerTy() &&
"OMP atomic expects a pointer to target memory");
- assert((X.ElemTy->isIntegerTy() || X.ElemTy->isPointerTy()) &&
- "OMP atomic compare expected a integer scalar type");
// compare capture
if (V.Var) {
assert(V.Var->getType()->isPointerTy() && "v.var must be of pointer type");
assert(V.ElemTy == X.ElemTy && "x and v must be of same type");
}
+ bool IsInteger = E->getType()->isIntegerTy();
+
if (Op == OMPAtomicCompareOp::EQ) {
AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
- AtomicCmpXchgInst *Result =
- Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure);
+ AtomicCmpXchgInst *Result = nullptr;
+ if (!IsInteger) {
+ unsigned Addrspace =
+ cast<PointerType>(X.Var->getType())->getAddressSpace();
+ IntegerType *IntCastTy =
+ IntegerType::get(M.getContext(), X.ElemTy->getScalarSizeInBits());
+ Value *XBCast =
+ Builder.CreateBitCast(X.Var, IntCastTy->getPointerTo(Addrspace));
+ Value *EBCast = Builder.CreateBitCast(E, IntCastTy);
+ Value *DBCast = Builder.CreateBitCast(D, IntCastTy);
+ Result = Builder.CreateAtomicCmpXchg(XBCast, EBCast, DBCast, MaybeAlign(),
+ AO, Failure);
+ } else {
+ Result =
+ Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure);
+ }
+
if (V.Var) {
Value *OldValue = Builder.CreateExtractValue(Result, /*Idxs=*/0);
+ if (!IsInteger)
+ OldValue = Builder.CreateBitCast(OldValue, X.ElemTy);
assert(OldValue->getType() == V.ElemTy &&
"OldValue and V must be of same type");
if (IsPostfixUpdate) {
@@ -4213,19 +4270,29 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
// x = x <= expr ? x : expr;
AtomicRMWInst::BinOp NewOp;
if (IsXBinopExpr) {
- if (X.IsSigned)
- NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Min
- : AtomicRMWInst::Max;
- else
- NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMin
- : AtomicRMWInst::UMax;
+ if (IsInteger) {
+ if (X.IsSigned)
+ NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Min
+ : AtomicRMWInst::Max;
+ else
+ NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMin
+ : AtomicRMWInst::UMax;
+ } else {
+ NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::FMin
+ : AtomicRMWInst::FMax;
+ }
} else {
- if (X.IsSigned)
- NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Max
- : AtomicRMWInst::Min;
- else
- NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMax
- : AtomicRMWInst::UMin;
+ if (IsInteger) {
+ if (X.IsSigned)
+ NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Max
+ : AtomicRMWInst::Min;
+ else
+ NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMax
+ : AtomicRMWInst::UMin;
+ } else {
+ NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::FMax
+ : AtomicRMWInst::FMin;
+ }
}
AtomicRMWInst *OldValue =
@@ -4243,12 +4310,18 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
case AtomicRMWInst::UMax:
Pred = CmpInst::ICMP_UGT;
break;
+ case AtomicRMWInst::FMax:
+ Pred = CmpInst::FCMP_OGT;
+ break;
case AtomicRMWInst::Min:
Pred = CmpInst::ICMP_SLT;
break;
case AtomicRMWInst::UMin:
Pred = CmpInst::ICMP_ULT;
break;
+ case AtomicRMWInst::FMin:
+ Pred = CmpInst::FCMP_OLT;
+ break;
default:
llvm_unreachable("unexpected comparison op");
}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 596348ddb462..a29040b8c2aa 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -1590,10 +1590,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Out << ", ";
}
- if (CE->hasIndices())
- for (unsigned I : CE->getIndices())
- Out << ", " << I;
-
if (CE->isCast()) {
Out << " to ";
WriterCtx.TypePrinter->print(CE->getType(), Out);
@@ -3542,8 +3538,8 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
Out << ", no_sanitize_address";
if (MD.NoHWAddress)
Out << ", no_sanitize_hwaddress";
- if (MD.NoMemtag)
- Out << ", no_sanitize_memtag";
+ if (MD.Memtag)
+ Out << ", sanitize_memtag";
if (MD.IsDynInit)
Out << ", sanitize_address_dyninit";
}
@@ -4299,9 +4295,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
bool PrintAllTypes = false;
Type *TheType = Operand->getType();
- // Select, Store and ShuffleVector always print all types.
- if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I)
- || isa<ReturnInst>(I)) {
+ // Select, Store, ShuffleVector and CmpXchg always print all types.
+ if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I) ||
+ isa<ReturnInst>(I) || isa<AtomicCmpXchgInst>(I)) {
PrintAllTypes = true;
} else {
for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) {
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 41b4f2919221..98adff107cec 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -1218,9 +1218,13 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
if (Instruction::isIntDivRem(Opcode) && C2Splat->isNullValue())
return PoisonValue::get(VTy);
if (Constant *C1Splat = C1->getSplatValue()) {
- return ConstantVector::getSplat(
- VTy->getElementCount(),
- ConstantExpr::get(Opcode, C1Splat, C2Splat));
+ Constant *Res =
+ ConstantExpr::isDesirableBinOp(Opcode)
+ ? ConstantExpr::get(Opcode, C1Splat, C2Splat)
+ : ConstantFoldBinaryInstruction(Opcode, C1Splat, C2Splat);
+ if (!Res)
+ return nullptr;
+ return ConstantVector::getSplat(VTy->getElementCount(), Res);
}
}
@@ -1237,7 +1241,12 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue())
return PoisonValue::get(VTy);
- Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
+ Constant *Res = ConstantExpr::isDesirableBinOp(Opcode)
+ ? ConstantExpr::get(Opcode, LHS, RHS)
+ : ConstantFoldBinaryInstruction(Opcode, LHS, RHS);
+ if (!Res)
+ return nullptr;
+ Result.push_back(Res);
}
return ConstantVector::get(Result);
@@ -2218,9 +2227,15 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
: cast<FixedVectorType>(CurrIdx->getType())->getNumElements(),
Factor);
- NewIdxs[i] = ConstantExpr::getSRem(CurrIdx, Factor);
+ NewIdxs[i] =
+ ConstantFoldBinaryInstruction(Instruction::SRem, CurrIdx, Factor);
+
+ Constant *Div =
+ ConstantFoldBinaryInstruction(Instruction::SDiv, CurrIdx, Factor);
- Constant *Div = ConstantExpr::getSDiv(CurrIdx, Factor);
+ // We're working on either ConstantInt or vectors of ConstantInt,
+ // so these should always fold.
+ assert(NewIdxs[i] != nullptr && Div != nullptr && "Should have folded");
unsigned CommonExtendedWidth =
std::max(PrevIdx->getType()->getScalarSizeInBits(),
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 0bf5e09d6647..f9800cc0c07c 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -547,8 +547,6 @@ void llvm::deleteConstant(Constant *C) {
delete static_cast<InsertElementConstantExpr *>(C);
else if (isa<ShuffleVectorConstantExpr>(C))
delete static_cast<ShuffleVectorConstantExpr *>(C);
- else if (isa<InsertValueConstantExpr>(C))
- delete static_cast<InsertValueConstantExpr *>(C);
else if (isa<GetElementPtrConstantExpr>(C))
delete static_cast<GetElementPtrConstantExpr *>(C);
else if (isa<CompareConstantExpr>(C))
@@ -561,51 +559,6 @@ void llvm::deleteConstant(Constant *C) {
}
}
-static bool canTrapImpl(const Constant *C,
- SmallPtrSetImpl<const Constant *> &NonTrappingOps) {
- assert(C->getType()->isFirstClassType() &&
- "Cannot evaluate non-first-class types!");
- // ConstantExpr or ConstantAggregate trap if any operands can trap.
- if (isa<ConstantExpr>(C) || isa<ConstantAggregate>(C)) {
- for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
- const Constant *Op = cast<Constant>(C->getOperand(i));
- if (isa<ConstantExpr>(Op) || isa<ConstantAggregate>(Op)) {
- if (NonTrappingOps.insert(Op).second && canTrapImpl(Op, NonTrappingOps))
- return true;
- }
- }
- }
-
- // The only leafs that can trap are constant expressions.
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
- if (!CE)
- return false;
-
- // Otherwise, only specific operations can trap.
- switch (CE->getOpcode()) {
- default:
- return false;
- case Instruction::SDiv:
- case Instruction::SRem:
- // Signed div/rem can trap for SignedMin / -1.
- if (!CE->getOperand(0)->isNotMinSignedValue() &&
- (!isa<ConstantInt>(CE->getOperand(1)) ||
- CE->getOperand(1)->isAllOnesValue()))
- return true;
- LLVM_FALLTHROUGH;
- case Instruction::UDiv:
- case Instruction::URem:
- // Div and rem can trap if the RHS is not known to be non-zero.
- return !isa<ConstantInt>(CE->getOperand(1)) ||
- CE->getOperand(1)->isNullValue();
- }
-}
-
-bool Constant::canTrap() const {
- SmallPtrSet<const Constant *, 4> NonTrappingOps;
- return canTrapImpl(this, NonTrappingOps);
-}
-
/// Check if C contains a GlobalValue for which Predicate is true.
static bool
ConstHasGlobalValuePredicate(const Constant *C,
@@ -1488,14 +1441,6 @@ bool ConstantExpr::isCompare() const {
return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp;
}
-bool ConstantExpr::hasIndices() const {
- return getOpcode() == Instruction::InsertValue;
-}
-
-ArrayRef<unsigned> ConstantExpr::getIndices() const {
- return cast<InsertValueConstantExpr>(this)->Indices;
-}
-
unsigned ConstantExpr::getPredicate() const {
return cast<CompareConstantExpr>(this)->predicate;
}
@@ -1539,9 +1484,6 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
OnlyIfReducedTy);
case Instruction::ExtractElement:
return ConstantExpr::getExtractElement(Ops[0], Ops[1], OnlyIfReducedTy);
- case Instruction::InsertValue:
- return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices(),
- OnlyIfReducedTy);
case Instruction::FNeg:
return ConstantExpr::getFNeg(Ops[0]);
case Instruction::ShuffleVector:
@@ -2324,6 +2266,8 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
// Check the operands for consistency first.
assert(Instruction::isBinaryOp(Opcode) &&
"Invalid opcode in binary constant expression");
+ assert(isSupportedBinOp(Opcode) &&
+ "Binop not supported as constant expression");
assert(C1->getType() == C2->getType() &&
"Operand types in binary constant expression should match");
@@ -2378,6 +2322,60 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
}
+bool ConstantExpr::isDesirableBinOp(unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ return false;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return true;
+ default:
+ llvm_unreachable("Argument must be binop opcode");
+ }
+}
+
+bool ConstantExpr::isSupportedBinOp(unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ return false;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return true;
+ default:
+ llvm_unreachable("Argument must be binop opcode");
+ }
+}
+
Constant *ConstantExpr::getSizeOf(Type* Ty) {
// sizeof is implemented as: (i64) gep (Ty*)null, 1
// Note that a non-inbounds gep is used, as null isn't within any object.
@@ -2517,7 +2515,7 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
if (InRangeIndex && *InRangeIndex < 63)
SubClassOptionalData |= (*InRangeIndex + 1) << 1;
const ConstantExprKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
- SubClassOptionalData, None, None, Ty);
+ SubClassOptionalData, None, Ty);
LLVMContextImpl *pImpl = C->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
@@ -2638,36 +2636,12 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
// Look up the constant in the table first to ensure uniqueness
Constant *ArgVec[] = {V1, V2};
- ConstantExprKeyType Key(Instruction::ShuffleVector, ArgVec, 0, 0, None, Mask);
+ ConstantExprKeyType Key(Instruction::ShuffleVector, ArgVec, 0, 0, Mask);
LLVMContextImpl *pImpl = ShufTy->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(ShufTy, Key);
}
-Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
- ArrayRef<unsigned> Idxs,
- Type *OnlyIfReducedTy) {
- assert(Agg->getType()->isFirstClassType() &&
- "Non-first-class type for constant insertvalue expression");
-
- assert(ExtractValueInst::getIndexedType(Agg->getType(),
- Idxs) == Val->getType() &&
- "insertvalue indices invalid!");
- Type *ReqTy = Val->getType();
-
- if (Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs))
- return FC;
-
- if (OnlyIfReducedTy == ReqTy)
- return nullptr;
-
- Constant *ArgVec[] = { Agg, Val };
- const ConstantExprKeyType Key(Instruction::InsertValue, ArgVec, 0, 0, Idxs);
-
- LLVMContextImpl *pImpl = Agg->getContext().pImpl;
- return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
-}
-
Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NEG a nonintegral value!");
@@ -2694,10 +2668,6 @@ Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2,
return get(Instruction::Add, C1, C2, Flags);
}
-Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
- return get(Instruction::FAdd, C1, C2);
-}
-
Constant *ConstantExpr::getSub(Constant *C1, Constant *C2,
bool HasNUW, bool HasNSW) {
unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
@@ -2705,10 +2675,6 @@ Constant *ConstantExpr::getSub(Constant *C1, Constant *C2,
return get(Instruction::Sub, C1, C2, Flags);
}
-Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
- return get(Instruction::FSub, C1, C2);
-}
-
Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
bool HasNUW, bool HasNSW) {
unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
@@ -2716,36 +2682,6 @@ Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
return get(Instruction::Mul, C1, C2, Flags);
}
-Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
- return get(Instruction::FMul, C1, C2);
-}
-
-Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2, bool isExact) {
- return get(Instruction::UDiv, C1, C2,
- isExact ? PossiblyExactOperator::IsExact : 0);
-}
-
-Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2, bool isExact) {
- return get(Instruction::SDiv, C1, C2,
- isExact ? PossiblyExactOperator::IsExact : 0);
-}
-
-Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
- return get(Instruction::FDiv, C1, C2);
-}
-
-Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
- return get(Instruction::URem, C1, C2);
-}
-
-Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
- return get(Instruction::SRem, C1, C2);
-}
-
-Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
- return get(Instruction::FRem, C1, C2);
-}
-
Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
return get(Instruction::And, C1, C2);
}
@@ -3517,9 +3453,6 @@ Instruction *ConstantExpr::getAsInstruction(Instruction *InsertBefore) const {
return InsertElementInst::Create(Ops[0], Ops[1], Ops[2], "", InsertBefore);
case Instruction::ExtractElement:
return ExtractElementInst::Create(Ops[0], Ops[1], "", InsertBefore);
- case Instruction::InsertValue:
- return InsertValueInst::Create(Ops[0], Ops[1], getIndices(), "",
- InsertBefore);
case Instruction::ShuffleVector:
return new ShuffleVectorInst(Ops[0], Ops[1], getShuffleMask(), "",
InsertBefore);
diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h
index 21ef1c0d9f64..1d74e2d49f35 100644
--- a/llvm/lib/IR/ConstantsContext.h
+++ b/llvm/lib/IR/ConstantsContext.h
@@ -209,37 +209,6 @@ public:
}
};
-/// InsertValueConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// insertvalue constant exprs.
-class InsertValueConstantExpr final : public ConstantExpr {
-public:
- InsertValueConstantExpr(Constant *Agg, Constant *Val,
- ArrayRef<unsigned> IdxList, Type *DestTy)
- : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
- Indices(IdxList.begin(), IdxList.end()) {
- Op<0>() = Agg;
- Op<1>() = Val;
- }
-
- // allocate space for exactly one operand
- void *operator new(size_t S) { return User::operator new(S, 2); }
- void operator delete(void *Ptr) { User::operator delete(Ptr); }
-
- /// Indices - These identify the position for the insertion.
- const SmallVector<unsigned, 4> Indices;
-
- /// Transparently provide more efficient getOperand methods.
- DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
- static bool classof(const ConstantExpr *CE) {
- return CE->getOpcode() == Instruction::InsertValue;
- }
- static bool classof(const Value *V) {
- return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
- }
-};
-
/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
/// used behind the scenes to implement getelementpr constant exprs.
class GetElementPtrConstantExpr final : public ConstantExpr {
@@ -333,11 +302,6 @@ struct OperandTraits<ShuffleVectorConstantExpr>
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
template <>
-struct OperandTraits<InsertValueConstantExpr>
- : public FixedNumOperandTraits<InsertValueConstantExpr, 2> {};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
-
-template <>
struct OperandTraits<GetElementPtrConstantExpr>
: public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {};
@@ -472,7 +436,6 @@ private:
uint8_t SubclassOptionalData;
uint16_t SubclassData;
ArrayRef<Constant *> Ops;
- ArrayRef<unsigned> Indexes;
ArrayRef<int> ShuffleMask;
Type *ExplicitTy;
@@ -482,12 +445,6 @@ private:
return None;
}
- static ArrayRef<unsigned> getIndicesIfValid(const ConstantExpr *CE) {
- if (CE->hasIndices())
- return CE->getIndices();
- return None;
- }
-
static Type *getSourceElementTypeIfValid(const ConstantExpr *CE) {
if (auto *GEPCE = dyn_cast<GetElementPtrConstantExpr>(CE))
return GEPCE->getSourceElementType();
@@ -498,18 +455,17 @@ public:
ConstantExprKeyType(unsigned Opcode, ArrayRef<Constant *> Ops,
unsigned short SubclassData = 0,
unsigned short SubclassOptionalData = 0,
- ArrayRef<unsigned> Indexes = None,
ArrayRef<int> ShuffleMask = None,
Type *ExplicitTy = nullptr)
: Opcode(Opcode), SubclassOptionalData(SubclassOptionalData),
- SubclassData(SubclassData), Ops(Ops), Indexes(Indexes),
- ShuffleMask(ShuffleMask), ExplicitTy(ExplicitTy) {}
+ SubclassData(SubclassData), Ops(Ops), ShuffleMask(ShuffleMask),
+ ExplicitTy(ExplicitTy) {}
ConstantExprKeyType(ArrayRef<Constant *> Operands, const ConstantExpr *CE)
: Opcode(CE->getOpcode()),
SubclassOptionalData(CE->getRawSubclassOptionalData()),
SubclassData(CE->isCompare() ? CE->getPredicate() : 0), Ops(Operands),
- Indexes(getIndicesIfValid(CE)), ShuffleMask(getShuffleMaskIfValid(CE)),
+ ShuffleMask(getShuffleMaskIfValid(CE)),
ExplicitTy(getSourceElementTypeIfValid(CE)) {}
ConstantExprKeyType(const ConstantExpr *CE,
@@ -517,7 +473,7 @@ public:
: Opcode(CE->getOpcode()),
SubclassOptionalData(CE->getRawSubclassOptionalData()),
SubclassData(CE->isCompare() ? CE->getPredicate() : 0),
- Indexes(getIndicesIfValid(CE)), ShuffleMask(getShuffleMaskIfValid(CE)),
+ ShuffleMask(getShuffleMaskIfValid(CE)),
ExplicitTy(getSourceElementTypeIfValid(CE)) {
assert(Storage.empty() && "Expected empty storage");
for (unsigned I = 0, E = CE->getNumOperands(); I != E; ++I)
@@ -528,8 +484,7 @@ public:
bool operator==(const ConstantExprKeyType &X) const {
return Opcode == X.Opcode && SubclassData == X.SubclassData &&
SubclassOptionalData == X.SubclassOptionalData && Ops == X.Ops &&
- Indexes == X.Indexes && ShuffleMask == X.ShuffleMask &&
- ExplicitTy == X.ExplicitTy;
+ ShuffleMask == X.ShuffleMask && ExplicitTy == X.ExplicitTy;
}
bool operator==(const ConstantExpr *CE) const {
@@ -544,8 +499,6 @@ public:
for (unsigned I = 0, E = Ops.size(); I != E; ++I)
if (Ops[I] != CE->getOperand(I))
return false;
- if (Indexes != getIndicesIfValid(CE))
- return false;
if (ShuffleMask != getShuffleMaskIfValid(CE))
return false;
if (ExplicitTy != getSourceElementTypeIfValid(CE))
@@ -557,7 +510,6 @@ public:
return hash_combine(
Opcode, SubclassOptionalData, SubclassData,
hash_combine_range(Ops.begin(), Ops.end()),
- hash_combine_range(Indexes.begin(), Indexes.end()),
hash_combine_range(ShuffleMask.begin(), ShuffleMask.end()), ExplicitTy);
}
@@ -583,8 +535,6 @@ public:
return new InsertElementConstantExpr(Ops[0], Ops[1], Ops[2]);
case Instruction::ShuffleVector:
return new ShuffleVectorConstantExpr(Ops[0], Ops[1], ShuffleMask);
- case Instruction::InsertValue:
- return new InsertValueConstantExpr(Ops[0], Ops[1], Indexes, Ty);
case Instruction::GetElementPtr:
return GetElementPtrConstantExpr::Create(ExplicitTy, Ops[0], Ops.slice(1),
Ty, SubclassOptionalData);
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 4b9189ca5baa..08b7b0e1f956 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -74,13 +74,16 @@ void LLVMDisposeMessage(char *Message) {
/*===-- Operations on contexts --------------------------------------------===*/
-static ManagedStatic<LLVMContext> GlobalContext;
+static LLVMContext &getGlobalContext() {
+ static LLVMContext GlobalContext;
+ return GlobalContext;
+}
LLVMContextRef LLVMContextCreate() {
return wrap(new LLVMContext());
}
-LLVMContextRef LLVMGetGlobalContext() { return wrap(&*GlobalContext); }
+LLVMContextRef LLVMGetGlobalContext() { return wrap(&getGlobalContext()); }
void LLVMContextSetDiagnosticHandler(LLVMContextRef C,
LLVMDiagnosticHandler Handler,
@@ -251,7 +254,7 @@ LLVMDiagnosticSeverity LLVMGetDiagInfoSeverity(LLVMDiagnosticInfoRef DI) {
/*===-- Operations on modules ---------------------------------------------===*/
LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) {
- return wrap(new Module(ModuleID, *GlobalContext));
+ return wrap(new Module(ModuleID, getGlobalContext()));
}
LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID,
@@ -1571,11 +1574,6 @@ LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
unwrap<Constant>(RHSConstant)));
}
-LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getFAdd(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
@@ -1593,11 +1591,6 @@ LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
unwrap<Constant>(RHSConstant)));
}
-LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
@@ -1615,53 +1608,6 @@ LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
unwrap<Constant>(RHSConstant)));
}
-LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getFMul(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstExactUDiv(LLVMValueRef LHSConstant,
- LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getExactUDiv(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant,
- LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getExactSDiv(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
-}
-
LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
@@ -1875,14 +1821,6 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
IntMask));
}
-LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
- LLVMValueRef ElementValueConstant,
- unsigned *IdxList, unsigned NumIdx) {
- return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
- unwrap<Constant>(ElementValueConstant),
- makeArrayRef(IdxList, NumIdx)));
-}
-
LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
const char *Constraints,
LLVMBool HasSideEffects,
@@ -2843,6 +2781,10 @@ void LLVMInstructionEraseFromParent(LLVMValueRef Inst) {
unwrap<Instruction>(Inst)->eraseFromParent();
}
+void LLVMDeleteInstruction(LLVMValueRef Inst) {
+ unwrap<Instruction>(Inst)->deleteValue();
+}
+
LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst) {
if (ICmpInst *I = dyn_cast<ICmpInst>(unwrap(Inst)))
return (LLVMIntPredicate)I->getPredicate();
@@ -3079,8 +3021,6 @@ unsigned LLVMGetNumIndices(LLVMValueRef Inst) {
return EV->getNumIndices();
if (auto *IV = dyn_cast<InsertValueInst>(I))
return IV->getNumIndices();
- if (auto *CE = dyn_cast<ConstantExpr>(I))
- return CE->getIndices().size();
llvm_unreachable(
"LLVMGetNumIndices applies only to extractvalue and insertvalue!");
}
@@ -3091,8 +3031,6 @@ const unsigned *LLVMGetIndices(LLVMValueRef Inst) {
return EV->getIndices().data();
if (auto *IV = dyn_cast<InsertValueInst>(I))
return IV->getIndices().data();
- if (auto *CE = dyn_cast<ConstantExpr>(I))
- return CE->getIndices().data();
llvm_unreachable(
"LLVMGetIndices applies only to extractvalue and insertvalue!");
}
@@ -3664,6 +3602,8 @@ static AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMAtomicRMWBinOp BinOp) {
case LLVMAtomicRMWBinOpUMin: return AtomicRMWInst::UMin;
case LLVMAtomicRMWBinOpFAdd: return AtomicRMWInst::FAdd;
case LLVMAtomicRMWBinOpFSub: return AtomicRMWInst::FSub;
+ case LLVMAtomicRMWBinOpFMax: return AtomicRMWInst::FMax;
+ case LLVMAtomicRMWBinOpFMin: return AtomicRMWInst::FMin;
}
llvm_unreachable("Invalid LLVMAtomicRMWBinOp value!");
@@ -3684,6 +3624,8 @@ static LLVMAtomicRMWBinOp mapToLLVMRMWBinOp(AtomicRMWInst::BinOp BinOp) {
case AtomicRMWInst::UMin: return LLVMAtomicRMWBinOpUMin;
case AtomicRMWInst::FAdd: return LLVMAtomicRMWBinOpFAdd;
case AtomicRMWInst::FSub: return LLVMAtomicRMWBinOpFSub;
+ case AtomicRMWInst::FMax: return LLVMAtomicRMWBinOpFMax;
+ case AtomicRMWInst::FMin: return LLVMAtomicRMWBinOpFMin;
default: break;
}
diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp
index 203ad6dae1ff..c75b1aa7c1d6 100644
--- a/llvm/lib/IR/InlineAsm.cpp
+++ b/llvm/lib/IR/InlineAsm.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
#include <algorithm>
#include <cassert>
#include <cctype>
@@ -33,9 +34,10 @@ InlineAsm::InlineAsm(FunctionType *FTy, const std::string &asmString,
AsmString(asmString), Constraints(constraints), FTy(FTy),
HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack),
Dialect(asmDialect), CanThrow(canThrow) {
+#ifndef NDEBUG
// Do various checks on the constraint string and type.
- assert(Verify(getFunctionType(), constraints) &&
- "Function type not legal for constraints!");
+ cantFail(verify(getFunctionType(), constraints));
+#endif
}
InlineAsm *InlineAsm::get(FunctionType *FTy, StringRef AsmString,
@@ -248,15 +250,19 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
return Result;
}
-/// Verify - Verify that the specified constraint string is reasonable for the
-/// specified function type, and otherwise validate the constraint string.
-bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
- if (Ty->isVarArg()) return false;
+static Error makeStringError(const char *Msg) {
+ return createStringError(errc::invalid_argument, Msg);
+}
+
+Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) {
+ if (Ty->isVarArg())
+ return makeStringError("inline asm cannot be variadic");
ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
// Error parsing constraints.
- if (Constraints.empty() && !ConstStr.empty()) return false;
+ if (Constraints.empty() && !ConstStr.empty())
+ return makeStringError("failed to parse constraints");
unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
unsigned NumIndirect = 0;
@@ -265,7 +271,9 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
switch (Constraint.Type) {
case InlineAsm::isOutput:
if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0)
- return false; // outputs before inputs and clobbers.
+ return makeStringError("output constraint occurs after input "
+ "or clobber constraint");
+
if (!Constraint.isIndirect) {
++NumOutputs;
break;
@@ -273,7 +281,9 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
++NumIndirect;
LLVM_FALLTHROUGH; // We fall through for Indirect Outputs.
case InlineAsm::isInput:
- if (NumClobbers) return false; // inputs before clobbers.
+ if (NumClobbers)
+ return makeStringError("input constraint occurs after clobber "
+ "constraint");
++NumInputs;
break;
case InlineAsm::isClobber:
@@ -284,18 +294,23 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
switch (NumOutputs) {
case 0:
- if (!Ty->getReturnType()->isVoidTy()) return false;
+ if (!Ty->getReturnType()->isVoidTy())
+ return makeStringError("inline asm without outputs must return void");
break;
case 1:
- if (Ty->getReturnType()->isStructTy()) return false;
+ if (Ty->getReturnType()->isStructTy())
+ return makeStringError("inline asm with one output cannot return struct");
break;
default:
StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
if (!STy || STy->getNumElements() != NumOutputs)
- return false;
+ return makeStringError("number of output constraints does not match "
+ "number of return struct elements");
break;
}
- if (Ty->getNumParams() != NumInputs) return false;
- return true;
+ if (Ty->getNumParams() != NumInputs)
+ return makeStringError("number of input constraints does not match number "
+ "of parameters");
+ return Error::success();
}
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 6a91edb75dd2..b333f40f3ce9 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1696,6 +1696,10 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) {
return "fadd";
case AtomicRMWInst::FSub:
return "fsub";
+ case AtomicRMWInst::FMax:
+ return "fmax";
+ case AtomicRMWInst::FMin:
+ return "fmin";
case AtomicRMWInst::BAD_BINOP:
return "<invalid operation>";
}
@@ -4423,10 +4427,9 @@ MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() {
assert(SI.getNumSuccessors() == Weights->size() &&
"num of prof branch_weights must accord with num of successors");
- bool AllZeroes =
- all_of(Weights.getValue(), [](uint32_t W) { return W == 0; });
+ bool AllZeroes = all_of(Weights.value(), [](uint32_t W) { return W == 0; });
- if (AllZeroes || Weights.getValue().size() < 2)
+ if (AllZeroes || Weights.value().size() < 2)
return nullptr;
return MDBuilder(SI.getParent()->getContext()).createBranchWeights(*Weights);
@@ -4460,8 +4463,8 @@ SwitchInstProfUpdateWrapper::removeCase(SwitchInst::CaseIt I) {
// Copy the last case to the place of the removed one and shrink.
// This is tightly coupled with the way SwitchInst::removeCase() removes
// the cases in SwitchInst::removeCase(CaseIt).
- Weights.getValue()[I->getCaseIndex() + 1] = Weights.getValue().back();
- Weights.getValue().pop_back();
+ Weights.value()[I->getCaseIndex() + 1] = Weights.value().back();
+ Weights.value().pop_back();
}
return SI.removeCase(I);
}
@@ -4474,10 +4477,10 @@ void SwitchInstProfUpdateWrapper::addCase(
if (!Weights && W && *W) {
Changed = true;
Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0);
- Weights.getValue()[SI.getNumSuccessors() - 1] = *W;
+ Weights.value()[SI.getNumSuccessors() - 1] = *W;
} else if (Weights) {
Changed = true;
- Weights.getValue().push_back(W.value_or(0));
+ Weights.value().push_back(W.value_or(0));
}
if (Weights)
assert(SI.getNumSuccessors() == Weights->size() &&
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index b132a9dcb812..65a9a32ad2c5 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -223,13 +223,13 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const {
bool ConstrainedFPIntrinsic::isDefaultFPEnvironment() const {
Optional<fp::ExceptionBehavior> Except = getExceptionBehavior();
if (Except) {
- if (Except.getValue() != fp::ebIgnore)
+ if (Except.value() != fp::ebIgnore)
return false;
}
Optional<RoundingMode> Rounding = getRoundingMode();
if (Rounding) {
- if (Rounding.getValue() != RoundingMode::NearestTiesToEven)
+ if (Rounding.value() != RoundingMode::NearestTiesToEven)
return false;
}
@@ -364,13 +364,13 @@ VPIntrinsic::getVectorLengthParamPos(Intrinsic::ID IntrinsicID) {
MaybeAlign VPIntrinsic::getPointerAlignment() const {
Optional<unsigned> PtrParamOpt = getMemoryPointerParamPos(getIntrinsicID());
assert(PtrParamOpt && "no pointer argument!");
- return getParamAlign(PtrParamOpt.getValue());
+ return getParamAlign(PtrParamOpt.value());
}
/// \return The pointer operand of this load,store, gather or scatter.
Value *VPIntrinsic::getMemoryPointerParam() const {
if (auto PtrParamOpt = getMemoryPointerParamPos(getIntrinsicID()))
- return getArgOperand(PtrParamOpt.getValue());
+ return getArgOperand(PtrParamOpt.value());
return nullptr;
}
@@ -391,7 +391,7 @@ Value *VPIntrinsic::getMemoryDataParam() const {
auto DataParamOpt = getMemoryDataParamPos(getIntrinsicID());
if (!DataParamOpt)
return nullptr;
- return getArgOperand(DataParamOpt.getValue());
+ return getArgOperand(DataParamOpt.value());
}
Optional<unsigned> VPIntrinsic::getMemoryDataParamPos(Intrinsic::ID VPID) {
diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp
index 06b3a3afef9d..d7aaf0008564 100644
--- a/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/llvm/lib/IR/LLVMContextImpl.cpp
@@ -27,7 +27,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/TypeSize.h"
#include <cassert>
#include <utility>
@@ -241,7 +240,7 @@ void LLVMContextImpl::getSyncScopeNames(
/// singleton OptBisect if not explicitly set.
OptPassGate &LLVMContextImpl::getOptPassGate() const {
if (!OPG)
- OPG = &(*OptBisector);
+ OPG = &getOptBisector();
return *OPG;
}
@@ -260,7 +259,7 @@ bool LLVMContextImpl::getOpaquePointers() {
}
void LLVMContextImpl::setOpaquePointers(bool OP) {
- assert((!OpaquePointers || OpaquePointers.getValue() == OP) &&
+ assert((!OpaquePointers || OpaquePointers.value() == OP) &&
"Cannot change opaque pointers mode once set");
OpaquePointers = OP;
}
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index ae2401026ebf..2a1a514922fd 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -592,13 +592,6 @@ MDNode::Header::~Header() {
(void)(O - 1)->~MDOperand();
}
-void *MDNode::Header::getLargePtr() const {
- static_assert(alignof(LargeStorageVector) <= alignof(Header),
- "LargeStorageVector too strongly aligned");
- return reinterpret_cast<char *>(const_cast<Header *>(this)) -
- sizeof(LargeStorageVector);
-}
-
void *MDNode::Header::getSmallPtr() {
static_assert(alignof(MDOperand) <= alignof(Header),
"MDOperand too strongly aligned");
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index 5cd74d53da75..b51ea45f651a 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -714,6 +714,18 @@ void Module::setStackProtectorGuardReg(StringRef Reg) {
addModuleFlag(ModFlagBehavior::Error, "stack-protector-guard-reg", ID);
}
+StringRef Module::getStackProtectorGuardSymbol() const {
+ Metadata *MD = getModuleFlag("stack-protector-guard-symbol");
+ if (auto *MDS = dyn_cast_or_null<MDString>(MD))
+ return MDS->getString();
+ return {};
+}
+
+void Module::setStackProtectorGuardSymbol(StringRef Symbol) {
+ MDString *ID = MDString::get(getContext(), Symbol);
+ addModuleFlag(ModFlagBehavior::Error, "stack-protector-guard-symbol", ID);
+}
+
int Module::getStackProtectorGuardOffset() const {
Metadata *MD = getModuleFlag("stack-protector-guard-offset");
if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(MD))
diff --git a/llvm/lib/IR/OptBisect.cpp b/llvm/lib/IR/OptBisect.cpp
index 418311eac814..c9054dba344a 100644
--- a/llvm/lib/IR/OptBisect.cpp
+++ b/llvm/lib/IR/OptBisect.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
static cl::opt<int> OptBisectLimit("opt-bisect-limit", cl::Hidden,
cl::init(OptBisect::Disabled), cl::Optional,
cl::cb<void, int>([](int Limit) {
- llvm::OptBisector->setLimit(Limit);
+ llvm::getOptBisector().setLimit(Limit);
}),
cl::desc("Maximum optimization to perform"));
@@ -52,4 +52,7 @@ bool OptBisect::checkPass(const StringRef PassName,
const int OptBisect::Disabled;
-ManagedStatic<OptBisect> llvm::OptBisector;
+OptBisect &llvm::getOptBisector() {
+ static OptBisect OptBisector;
+ return OptBisector;
+}
diff --git a/llvm/lib/IR/PassRegistry.cpp b/llvm/lib/IR/PassRegistry.cpp
index 94f607afec47..6c22fcd34769 100644
--- a/llvm/lib/IR/PassRegistry.cpp
+++ b/llvm/lib/IR/PassRegistry.cpp
@@ -15,21 +15,15 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Pass.h"
#include "llvm/PassInfo.h"
-#include "llvm/Support/ManagedStatic.h"
#include <cassert>
#include <memory>
#include <utility>
using namespace llvm;
-// FIXME: We use ManagedStatic to erase the pass registrar on shutdown.
-// Unfortunately, passes are registered with static ctors, and having
-// llvm_shutdown clear this map prevents successful resurrection after
-// llvm_shutdown is run. Ideally we should find a solution so that we don't
-// leak the map, AND can still resurrect after shutdown.
-static ManagedStatic<PassRegistry> PassRegistryObj;
PassRegistry *PassRegistry::getPassRegistry() {
- return &*PassRegistryObj;
+ static PassRegistry PassRegistryObj;
+ return &PassRegistryObj;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/InterfaceStub/IFSHandler.cpp b/llvm/lib/InterfaceStub/IFSHandler.cpp
index 71189e79360e..4edaeb74d6a7 100644
--- a/llvm/lib/InterfaceStub/IFSHandler.cpp
+++ b/llvm/lib/InterfaceStub/IFSHandler.cpp
@@ -202,8 +202,8 @@ Error ifs::writeIFSToOutputStream(raw_ostream &OS, const IFSStub &Stub) {
yaml::Output YamlOut(OS, nullptr, /*WrapColumn =*/0);
std::unique_ptr<IFSStubTriple> CopyStub(new IFSStubTriple(Stub));
if (Stub.Target.Arch) {
- CopyStub->Target.ArchString = std::string(
- ELF::convertEMachineToArchName(Stub.Target.Arch.getValue()));
+ CopyStub->Target.ArchString =
+ std::string(ELF::convertEMachineToArchName(Stub.Target.Arch.value()));
}
IFSTarget Target = Stub.Target;
@@ -222,36 +222,35 @@ Error ifs::overrideIFSTarget(IFSStub &Stub, Optional<IFSArch> OverrideArch,
Optional<std::string> OverrideTriple) {
std::error_code OverrideEC(1, std::generic_category());
if (OverrideArch) {
- if (Stub.Target.Arch &&
- Stub.Target.Arch.getValue() != OverrideArch.getValue()) {
+ if (Stub.Target.Arch && Stub.Target.Arch.value() != OverrideArch.value()) {
return make_error<StringError>(
"Supplied Arch conflicts with the text stub", OverrideEC);
}
- Stub.Target.Arch = OverrideArch.getValue();
+ Stub.Target.Arch = OverrideArch.value();
}
if (OverrideEndianness) {
if (Stub.Target.Endianness &&
- Stub.Target.Endianness.getValue() != OverrideEndianness.getValue()) {
+ Stub.Target.Endianness.value() != OverrideEndianness.value()) {
return make_error<StringError>(
"Supplied Endianness conflicts with the text stub", OverrideEC);
}
- Stub.Target.Endianness = OverrideEndianness.getValue();
+ Stub.Target.Endianness = OverrideEndianness.value();
}
if (OverrideBitWidth) {
if (Stub.Target.BitWidth &&
- Stub.Target.BitWidth.getValue() != OverrideBitWidth.getValue()) {
+ Stub.Target.BitWidth.value() != OverrideBitWidth.value()) {
return make_error<StringError>(
"Supplied BitWidth conflicts with the text stub", OverrideEC);
}
- Stub.Target.BitWidth = OverrideBitWidth.getValue();
+ Stub.Target.BitWidth = OverrideBitWidth.value();
}
if (OverrideTriple) {
if (Stub.Target.Triple &&
- Stub.Target.Triple.getValue() != OverrideTriple.getValue()) {
+ Stub.Target.Triple.value() != OverrideTriple.value()) {
return make_error<StringError>(
"Supplied Triple conflicts with the text stub", OverrideEC);
}
- Stub.Target.Triple = OverrideTriple.getValue();
+ Stub.Target.Triple = OverrideTriple.value();
}
return Error::success();
}
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 5d50e92ae377..e248e58e4e4e 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -81,17 +81,19 @@ extern cl::opt<bool> NoPGOWarnMismatch;
exit(1);
}
-Error Config::addSaveTemps(std::string OutputFileName,
- bool UseInputModulePath) {
+Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath,
+ const DenseSet<StringRef> &SaveTempsArgs) {
ShouldDiscardValueNames = false;
std::error_code EC;
- ResolutionFile =
- std::make_unique<raw_fd_ostream>(OutputFileName + "resolution.txt", EC,
- sys::fs::OpenFlags::OF_TextWithCRLF);
- if (EC) {
- ResolutionFile.reset();
- return errorCodeToError(EC);
+ if (SaveTempsArgs.empty() || SaveTempsArgs.contains("resolution")) {
+ ResolutionFile =
+ std::make_unique<raw_fd_ostream>(OutputFileName + "resolution.txt", EC,
+ sys::fs::OpenFlags::OF_TextWithCRLF);
+ if (EC) {
+ ResolutionFile.reset();
+ return errorCodeToError(EC);
+ }
}
auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) {
@@ -125,14 +127,7 @@ Error Config::addSaveTemps(std::string OutputFileName,
};
};
- setHook("0.preopt", PreOptModuleHook);
- setHook("1.promote", PostPromoteModuleHook);
- setHook("2.internalize", PostInternalizeModuleHook);
- setHook("3.import", PostImportModuleHook);
- setHook("4.opt", PostOptModuleHook);
- setHook("5.precodegen", PreCodeGenModuleHook);
-
- CombinedIndexHook =
+ auto SaveCombinedIndex =
[=](const ModuleSummaryIndex &Index,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
std::string Path = OutputFileName + "index.bc";
@@ -152,6 +147,31 @@ Error Config::addSaveTemps(std::string OutputFileName,
return true;
};
+ if (SaveTempsArgs.empty()) {
+ setHook("0.preopt", PreOptModuleHook);
+ setHook("1.promote", PostPromoteModuleHook);
+ setHook("2.internalize", PostInternalizeModuleHook);
+ setHook("3.import", PostImportModuleHook);
+ setHook("4.opt", PostOptModuleHook);
+ setHook("5.precodegen", PreCodeGenModuleHook);
+ CombinedIndexHook = SaveCombinedIndex;
+ } else {
+ if (SaveTempsArgs.contains("preopt"))
+ setHook("0.preopt", PreOptModuleHook);
+ if (SaveTempsArgs.contains("promote"))
+ setHook("1.promote", PostPromoteModuleHook);
+ if (SaveTempsArgs.contains("internalize"))
+ setHook("2.internalize", PostInternalizeModuleHook);
+ if (SaveTempsArgs.contains("import"))
+ setHook("3.import", PostImportModuleHook);
+ if (SaveTempsArgs.contains("opt"))
+ setHook("4.opt", PostOptModuleHook);
+ if (SaveTempsArgs.contains("precodegen"))
+ setHook("5.precodegen", PreCodeGenModuleHook);
+ if (SaveTempsArgs.contains("combinedindex"))
+ CombinedIndexHook = SaveCombinedIndex;
+ }
+
return Error::success();
}
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 5a819e2d736c..9e89cce8312e 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -1357,11 +1357,18 @@ Error IRLinker::linkModuleFlagsMetadata() {
DstM.getModuleIdentifier() + "'");
}
- auto replaceDstValue = [&](MDNode *New) {
+ auto ensureDistinctOp = [&](MDNode *DstValue) {
+ assert(isa<MDTuple>(DstValue) &&
+ "Expected MDTuple when appending module flags");
+ if (DstValue->isDistinct())
+ return dyn_cast<MDTuple>(DstValue);
+ MDTuple *New = MDTuple::getDistinct(
+ DstM.getContext(), SmallVector<Metadata *, 4>(DstValue->operands()));
Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
- MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps);
+ MDNode *Flag = MDTuple::getDistinct(DstM.getContext(), FlagOps);
DstModFlags->setOperand(DstIndex, Flag);
Flags[ID].first = Flag;
+ return New;
};
// Emit a warning if the values differ and either source or destination
@@ -1438,25 +1445,20 @@ Error IRLinker::linkModuleFlagsMetadata() {
break;
}
case Module::Append: {
- MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
+ MDTuple *DstValue = ensureDistinctOp(cast<MDNode>(DstOp->getOperand(2)));
MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
- SmallVector<Metadata *, 8> MDs;
- MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands());
- MDs.append(DstValue->op_begin(), DstValue->op_end());
- MDs.append(SrcValue->op_begin(), SrcValue->op_end());
-
- replaceDstValue(MDNode::get(DstM.getContext(), MDs));
+ for (const auto &O : SrcValue->operands())
+ DstValue->push_back(O);
break;
}
case Module::AppendUnique: {
SmallSetVector<Metadata *, 16> Elts;
- MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
+ MDTuple *DstValue = ensureDistinctOp(cast<MDNode>(DstOp->getOperand(2)));
MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
Elts.insert(DstValue->op_begin(), DstValue->op_end());
Elts.insert(SrcValue->op_begin(), SrcValue->op_end());
-
- replaceDstValue(MDNode::get(DstM.getContext(),
- makeArrayRef(Elts.begin(), Elts.end())));
+ for (auto I = DstValue->getNumOperands(); I < Elts.size(); I++)
+ DstValue->push_back(Elts[I]);
break;
}
}
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index eda495693595..78204ffe4c3b 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -145,7 +145,7 @@ struct ELFWriter {
uint64_t align(unsigned Alignment);
bool maybeWriteCompression(uint64_t Size,
- SmallVectorImpl<char> &CompressedContents,
+ SmallVectorImpl<uint8_t> &CompressedContents,
bool ZLibStyle, unsigned Alignment);
public:
@@ -819,7 +819,7 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
// Include the debug info compression header.
bool ELFWriter::maybeWriteCompression(
- uint64_t Size, SmallVectorImpl<char> &CompressedContents, bool ZLibStyle,
+ uint64_t Size, SmallVectorImpl<uint8_t> &CompressedContents, bool ZLibStyle,
unsigned Alignment) {
if (ZLibStyle) {
uint64_t HdrSize =
@@ -875,9 +875,11 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
raw_svector_ostream VecOS(UncompressedData);
Asm.writeSectionData(VecOS, &Section, Layout);
- SmallVector<char, 128> CompressedContents;
- zlib::compress(StringRef(UncompressedData.data(), UncompressedData.size()),
- CompressedContents);
+ SmallVector<uint8_t, 128> CompressedContents;
+ compression::zlib::compress(
+ makeArrayRef(reinterpret_cast<uint8_t *>(UncompressedData.data()),
+ UncompressedData.size()),
+ CompressedContents);
bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z;
if (!maybeWriteCompression(UncompressedData.size(), CompressedContents,
@@ -896,7 +898,7 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
// Add "z" prefix to section name. This is zlib-gnu style.
MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str());
}
- W.OS << CompressedContents;
+ W.OS << toStringRef(CompressedContents);
}
void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 4be84ca7feb5..d312e3521c9e 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -773,7 +773,7 @@ MCSectionXCOFF *MCContext::getXCOFFSection(
// Do the lookup. If we have a hit, return it.
auto IterBool = XCOFFUniquingMap.insert(std::make_pair(
IsDwarfSec
- ? XCOFFSectionKey(Section.str(), DwarfSectionSubtypeFlags.getValue())
+ ? XCOFFSectionKey(Section.str(), DwarfSectionSubtypeFlags.value())
: XCOFFSectionKey(Section.str(), CsectProp->MappingClass),
nullptr));
auto &Entry = *IterBool.first;
@@ -806,7 +806,7 @@ MCSectionXCOFF *MCContext::getXCOFFSection(
if (IsDwarfSec)
Result = new (XCOFFAllocator.Allocate())
MCSectionXCOFF(QualName->getUnqualifiedName(), Kind, QualName,
- DwarfSectionSubtypeFlags.getValue(), Begin, CachedName,
+ DwarfSectionSubtypeFlags.value(), Begin, CachedName,
MultiSymbolsAllowed);
else
Result = new (XCOFFAllocator.Allocate())
diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
index 0c041186936d..cf98cb8ff59f 100644
--- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -88,8 +88,8 @@ bool XCOFFSymbolInfo::operator<(const XCOFFSymbolInfo &SymInfo) const {
return SymInfo.StorageMappingClass.has_value();
if (StorageMappingClass) {
- return getSMCPriority(StorageMappingClass.getValue()) <
- getSMCPriority(SymInfo.StorageMappingClass.getValue());
+ return getSMCPriority(StorageMappingClass.value()) <
+ getSMCPriority(SymInfo.StorageMappingClass.value());
}
return false;
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 04a234be3b47..563d3487ef50 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -660,6 +660,8 @@ EndStmt:
Type = ELF::SHT_LLVM_SYMPART;
else if (TypeName == "llvm_bb_addr_map")
Type = ELF::SHT_LLVM_BB_ADDR_MAP;
+ else if (TypeName == "llvm_offloading")
+ Type = ELF::SHT_LLVM_OFFLOADING;
else if (TypeName.getAsInteger(0, Type))
return TokError("unknown section type");
}
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 8c582d225e30..694ea395fdec 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1585,6 +1585,16 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
return false;
}
+ // Parse directional local label references.
+ if (Identifier.equals_insensitive("@b") ||
+ Identifier.equals_insensitive("@f")) {
+ bool Before = Identifier.equals_insensitive("@b");
+ MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
+ if (Before && Sym->isUndefined())
+ return Error(FirstTokenLoc, "Expected @@ label before @B reference");
+ Res = MCSymbolRefExpr::create(Sym, getContext());
+ return false;
+ }
// Parse symbol variant.
std::pair<StringRef, StringRef> Split;
if (!MAI.useParensForSymbolVariant()) {
@@ -1714,34 +1724,10 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
case AsmToken::BigNum:
return TokError("literal value out of range for directive");
case AsmToken::Integer: {
- SMLoc Loc = getTok().getLoc();
int64_t IntVal = getTok().getIntVal();
Res = MCConstantExpr::create(IntVal, getContext());
EndLoc = Lexer.getTok().getEndLoc();
Lex(); // Eat token.
- // Look for 'b' or 'f' following an Integer as a directional label.
- if (Lexer.getKind() == AsmToken::Identifier) {
- StringRef IDVal = getTok().getString();
- // Look up the symbol variant if used.
- std::pair<StringRef, StringRef> Split = IDVal.split('@');
- MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- if (Split.first.size() != IDVal.size()) {
- Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
- if (Variant == MCSymbolRefExpr::VK_Invalid)
- return TokError("invalid variant '" + Split.second + "'");
- IDVal = Split.first;
- }
- if (IDVal == "f" || IDVal == "b") {
- MCSymbol *Sym =
- Ctx.getDirectionalLocalSymbol(IntVal, IDVal == "b");
- Res = MCSymbolRefExpr::create(Sym, Variant, getContext());
- if (IDVal == "b" && Sym->isUndefined())
- return Error(Loc, "directional label undefined");
- DirLabels.push_back(std::make_tuple(Loc, CppHashInfo, Sym));
- EndLoc = Lexer.getTok().getEndLoc();
- Lex(); // Eat identifier.
- }
- }
return false;
}
case AsmToken::String: {
@@ -2042,6 +2028,9 @@ bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
.CaseLower("and", AsmToken::Amp)
.CaseLower("not", AsmToken::Exclaim)
.CaseLower("or", AsmToken::Pipe)
+ .CaseLower("xor", AsmToken::Caret)
+ .CaseLower("shl", AsmToken::LessLess)
+ .CaseLower("shr", AsmToken::GreaterGreater)
.CaseLower("eq", AsmToken::EqualEqual)
.CaseLower("ne", AsmToken::ExclaimEqual)
.CaseLower("lt", AsmToken::Less)
@@ -2110,29 +2099,9 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
AsmToken ID = getTok();
SMLoc IDLoc = ID.getLoc();
StringRef IDVal;
- int64_t LocalLabelVal = -1;
if (Lexer.is(AsmToken::HashDirective))
return parseCppHashLineFilenameComment(IDLoc);
- // Allow an integer followed by a ':' as a directional local label.
- if (Lexer.is(AsmToken::Integer)) {
- LocalLabelVal = getTok().getIntVal();
- if (LocalLabelVal < 0) {
- if (!TheCondState.Ignore) {
- Lex(); // always eat a token
- return Error(IDLoc, "unexpected token at start of statement");
- }
- IDVal = "";
- } else {
- IDVal = getTok().getString();
- Lex(); // Consume the integer token to be used as an identifier token.
- if (Lexer.getKind() != AsmToken::Colon) {
- if (!TheCondState.Ignore) {
- Lex(); // always eat a token
- return Error(IDLoc, "unexpected token at start of statement");
- }
- }
- }
- } else if (Lexer.is(AsmToken::Dot)) {
+ if (Lexer.is(AsmToken::Dot)) {
// Treat '.' as a valid identifier in this context.
Lex();
IDVal = ".";
@@ -2257,19 +2226,22 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
// FIXME: This doesn't diagnose assignment to a symbol which has been
// implicitly marked as external.
MCSymbol *Sym;
- if (LocalLabelVal == -1) {
- if (ParsingMSInlineAsm && SI) {
- StringRef RewrittenLabel =
- SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
- assert(!RewrittenLabel.empty() &&
- "We should have an internal name here.");
- Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
- RewrittenLabel);
- IDVal = RewrittenLabel;
- }
+ if (ParsingMSInlineAsm && SI) {
+ StringRef RewrittenLabel =
+ SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
+ assert(!RewrittenLabel.empty() &&
+ "We should have an internal name here.");
+ Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
+ RewrittenLabel);
+ IDVal = RewrittenLabel;
+ }
+ // Handle directional local labels
+ if (IDVal == "@@") {
+ Sym = Ctx.createDirectionalLocalSymbol(0);
+ } else {
Sym = getContext().getOrCreateSymbol(IDVal);
- } else
- Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
+ }
+
// End of Labels should be treated as end of line for lexing
// purposes but that information is not available to the Lexer who
// does not understand Labels. This may cause us to see a Hash
@@ -4241,7 +4213,7 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure,
size_t FieldIndex = 0;
if (EndToken) {
// Initialize all fields with given initializers.
- while (getTok().isNot(EndToken.getValue()) &&
+ while (getTok().isNot(EndToken.value()) &&
FieldIndex < Structure.Fields.size()) {
const FieldInfo &Field = Structure.Fields[FieldIndex++];
if (parseOptionalToken(AsmToken::Comma)) {
@@ -4273,10 +4245,10 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure,
}
if (EndToken) {
- if (EndToken.getValue() == AsmToken::Greater)
+ if (EndToken.value() == AsmToken::Greater)
return parseAngleBracketClose();
- return parseToken(EndToken.getValue());
+ return parseToken(EndToken.value());
}
return false;
diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
index 98eb7eada064..71c8e6f02f8e 100644
--- a/llvm/lib/MC/MCSchedule.cpp
+++ b/llvm/lib/MC/MCSchedule.cpp
@@ -96,10 +96,10 @@ MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
continue;
unsigned NumUnits = SM.getProcResource(I->ProcResourceIdx)->NumUnits;
double Temp = NumUnits * 1.0 / I->Cycles;
- Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp;
+ Throughput = Throughput ? std::min(Throughput.value(), Temp) : Temp;
}
if (Throughput)
- return 1.0 / Throughput.getValue();
+ return 1.0 / Throughput.value();
// If no throughput value was calculated, assume that we can execute at the
// maximum issue width scaled by number of micro-ops for the schedule class.
@@ -140,10 +140,10 @@ MCSchedModel::getReciprocalThroughput(unsigned SchedClass,
if (!I->getCycles())
continue;
double Temp = countPopulation(I->getUnits()) * 1.0 / I->getCycles();
- Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp;
+ Throughput = Throughput ? std::min(Throughput.value(), Temp) : Temp;
}
if (Throughput)
- return 1.0 / Throughput.getValue();
+ return 1.0 / Throughput.value();
// If there are no execution resources specified for this class, then assume
// that it can execute at the maximum default issue width.
diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp
index 27dc1826819b..077cee132338 100644
--- a/llvm/lib/MC/MCSectionELF.cpp
+++ b/llvm/lib/MC/MCSectionELF.cpp
@@ -167,6 +167,8 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << "llvm_bb_addr_map";
else if (Type == ELF::SHT_LLVM_BB_ADDR_MAP_V0)
OS << "llvm_bb_addr_map_v0";
+ else if (Type == ELF::SHT_LLVM_OFFLOADING)
+ OS << "llvm_offloading";
else
report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) +
" for section " + getName());
diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp
index ee8fa04c421f..9a35ac69c47c 100644
--- a/llvm/lib/MC/MCSectionXCOFF.cpp
+++ b/llvm/lib/MC/MCSectionXCOFF.cpp
@@ -110,8 +110,8 @@ void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
// XCOFF debug sections.
if (getKind().isMetadata() && isDwarfSect()) {
- OS << "\n\t.dwsect "
- << format("0x%" PRIx32, getDwarfSubtypeFlags().getValue()) << '\n';
+ OS << "\n\t.dwsect " << format("0x%" PRIx32, getDwarfSubtypeFlags().value())
+ << '\n';
OS << MAI.getPrivateLabelPrefix() << getName() << ':' << '\n';
return;
}
diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp
index 9d8883a15c0b..77321829e614 100644
--- a/llvm/lib/ObjCopy/ConfigManager.cpp
+++ b/llvm/lib/ObjCopy/ConfigManager.cpp
@@ -20,9 +20,9 @@ Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const {
!Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() ||
!Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
!Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
- Common.ExtractDWO || Common.PreserveDates || Common.StripDWO ||
- Common.StripNonAlloc || Common.StripSections || Common.Weaken ||
- Common.DecompressDebugSections ||
+ !Common.SetSectionType.empty() || Common.ExtractDWO ||
+ Common.PreserveDates || Common.StripDWO || Common.StripNonAlloc ||
+ Common.StripSections || Common.Weaken || Common.DecompressDebugSections ||
Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty())
return createStringError(llvm::errc::invalid_argument,
"option is not supported for COFF");
@@ -38,9 +38,10 @@ Expected<const MachOConfig &> ConfigManager::getMachOConfig() const {
!Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() ||
!Common.UnneededSymbolsToRemove.empty() ||
!Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() ||
- Common.ExtractDWO || Common.PreserveDates || Common.StripAllGNU ||
- Common.StripDWO || Common.StripNonAlloc || Common.StripSections ||
- Common.Weaken || Common.DecompressDebugSections || Common.StripUnneeded ||
+ !Common.SetSectionType.empty() || Common.ExtractDWO ||
+ Common.PreserveDates || Common.StripAllGNU || Common.StripDWO ||
+ Common.StripNonAlloc || Common.StripSections || Common.Weaken ||
+ Common.DecompressDebugSections || Common.StripUnneeded ||
Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty())
return createStringError(llvm::errc::invalid_argument,
"option is not supported for MachO");
@@ -58,7 +59,8 @@ Expected<const WasmConfig &> ConfigManager::getWasmConfig() const {
!Common.UnneededSymbolsToRemove.empty() ||
!Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
!Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
- !Common.SetSectionFlags.empty() || !Common.SymbolsToRename.empty())
+ !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() ||
+ !Common.SymbolsToRename.empty())
return createStringError(llvm::errc::invalid_argument,
"only flags for section dumping, removal, and "
"addition are supported");
@@ -79,12 +81,12 @@ Expected<const XCOFFConfig &> ConfigManager::getXCOFFConfig() const {
!Common.UnneededSymbolsToRemove.empty() ||
!Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
!Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
- !Common.SetSectionFlags.empty() || !Common.SymbolsToRename.empty() ||
- Common.ExtractDWO || Common.ExtractMainPartition ||
- Common.OnlyKeepDebug || Common.PreserveDates || Common.StripAllGNU ||
- Common.StripDWO || Common.StripDebug || Common.StripNonAlloc ||
- Common.StripSections || Common.Weaken || Common.StripUnneeded ||
- Common.DecompressDebugSections) {
+ !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() ||
+ !Common.SymbolsToRename.empty() || Common.ExtractDWO ||
+ Common.ExtractMainPartition || Common.OnlyKeepDebug ||
+ Common.PreserveDates || Common.StripAllGNU || Common.StripDWO ||
+ Common.StripDebug || Common.StripNonAlloc || Common.StripSections ||
+ Common.Weaken || Common.StripUnneeded || Common.DecompressDebugSections) {
return createStringError(
llvm::errc::invalid_argument,
"no flags are supported yet, only basic copying is allowed");
diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
index 2d388f8a867e..781be3d8aeb1 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
@@ -600,8 +600,8 @@ handleUserSection(const NewSectionInfo &NewSection,
static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
Object &Obj) {
if (Config.OutputArch) {
- Obj.Machine = Config.OutputArch.getValue().EMachine;
- Obj.OSABI = Config.OutputArch.getValue().OSABI;
+ Obj.Machine = Config.OutputArch.value().EMachine;
+ Obj.OSABI = Config.OutputArch.value().OSABI;
}
if (!Config.SplitDWO.empty() && Config.ExtractDWO) {
@@ -629,6 +629,66 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
if (Error E = updateAndRemoveSymbols(Config, ELFConfig, Obj))
return E;
+ if (!Config.SetSectionAlignment.empty()) {
+ for (SectionBase &Sec : Obj.sections()) {
+ auto I = Config.SetSectionAlignment.find(Sec.Name);
+ if (I != Config.SetSectionAlignment.end())
+ Sec.Align = I->second;
+ }
+ }
+
+ if (Config.OnlyKeepDebug)
+ for (auto &Sec : Obj.sections())
+ if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE)
+ Sec.Type = SHT_NOBITS;
+
+ for (const NewSectionInfo &AddedSection : Config.AddSection) {
+ auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
+ OwnedDataSection &NewSection =
+ Obj.addSection<OwnedDataSection>(Name, Data);
+ if (Name.startswith(".note") && Name != ".note.GNU-stack")
+ NewSection.Type = SHT_NOTE;
+ return Error::success();
+ };
+ if (Error E = handleUserSection(AddedSection, AddSection))
+ return E;
+ }
+
+ for (const NewSectionInfo &NewSection : Config.UpdateSection) {
+ auto UpdateSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
+ return Obj.updateSection(Name, Data);
+ };
+ if (Error E = handleUserSection(NewSection, UpdateSection))
+ return E;
+ }
+
+ if (!Config.AddGnuDebugLink.empty())
+ Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink,
+ Config.GnuDebugLinkCRC32);
+
+ // If the symbol table was previously removed, we need to create a new one
+ // before adding new symbols.
+ if (!Obj.SymbolTable && !Config.SymbolsToAdd.empty())
+ if (Error E = Obj.addNewSymbolTable())
+ return E;
+
+ for (const NewSymbolInfo &SI : Config.SymbolsToAdd)
+ addSymbol(Obj, SI, ELFConfig.NewSymbolVisibility);
+
+ // --set-section-{flags,type} work with sections added by --add-section.
+ if (!Config.SetSectionFlags.empty() || !Config.SetSectionType.empty()) {
+ for (auto &Sec : Obj.sections()) {
+ const auto Iter = Config.SetSectionFlags.find(Sec.Name);
+ if (Iter != Config.SetSectionFlags.end()) {
+ const SectionFlagsUpdate &SFU = Iter->second;
+ setSectionFlagsAndType(Sec, SFU.NewFlags);
+ }
+ auto It2 = Config.SetSectionType.find(Sec.Name);
+ if (It2 != Config.SetSectionType.end())
+ Sec.Type = It2->second;
+ }
+ }
+
if (!Config.SectionsToRename.empty()) {
std::vector<RelocationSectionBase *> RelocSections;
DenseSet<SectionBase *> RenamedSections;
@@ -639,7 +699,7 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
const SectionRename &SR = Iter->second;
Sec.Name = std::string(SR.NewName);
if (SR.NewFlags)
- setSectionFlagsAndType(Sec, SR.NewFlags.getValue());
+ setSectionFlagsAndType(Sec, SR.NewFlags.value());
RenamedSections.insert(&Sec);
} else if (RelocSec && !(Sec.Flags & SHF_ALLOC))
// Postpone processing relocation sections which are not specified in
@@ -693,63 +753,6 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
}
}
- if (!Config.SetSectionAlignment.empty()) {
- for (SectionBase &Sec : Obj.sections()) {
- auto I = Config.SetSectionAlignment.find(Sec.Name);
- if (I != Config.SetSectionAlignment.end())
- Sec.Align = I->second;
- }
- }
-
- if (Config.OnlyKeepDebug)
- for (auto &Sec : Obj.sections())
- if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE)
- Sec.Type = SHT_NOBITS;
-
- for (const NewSectionInfo &AddedSection : Config.AddSection) {
- auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
- OwnedDataSection &NewSection =
- Obj.addSection<OwnedDataSection>(Name, Data);
- if (Name.startswith(".note") && Name != ".note.GNU-stack")
- NewSection.Type = SHT_NOTE;
- return Error::success();
- };
- if (Error E = handleUserSection(AddedSection, AddSection))
- return E;
- }
-
- for (const NewSectionInfo &NewSection : Config.UpdateSection) {
- auto UpdateSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
- return Obj.updateSection(Name, Data);
- };
- if (Error E = handleUserSection(NewSection, UpdateSection))
- return E;
- }
-
- if (!Config.AddGnuDebugLink.empty())
- Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink,
- Config.GnuDebugLinkCRC32);
-
- // If the symbol table was previously removed, we need to create a new one
- // before adding new symbols.
- if (!Obj.SymbolTable && !Config.SymbolsToAdd.empty())
- if (Error E = Obj.addNewSymbolTable())
- return E;
-
- for (const NewSymbolInfo &SI : Config.SymbolsToAdd)
- addSymbol(Obj, SI, ELFConfig.NewSymbolVisibility);
-
- // --set-section-flags works with sections added by --add-section.
- if (!Config.SetSectionFlags.empty()) {
- for (auto &Sec : Obj.sections()) {
- const auto Iter = Config.SetSectionFlags.find(Sec.Name);
- if (Iter != Config.SetSectionFlags.end()) {
- const SectionFlagsUpdate &SFU = Iter->second;
- setSectionFlagsAndType(Sec, SFU.NewFlags);
- }
- }
- }
-
if (ELFConfig.EntryExpr)
Obj.Entry = ELFConfig.EntryExpr(Obj.Entry);
return Error::success();
@@ -808,7 +811,7 @@ Error objcopy::elf::executeObjcopyOnBinary(const CommonConfig &Config,
return Obj.takeError();
// Prefer OutputArch (-O<format>) if set, otherwise infer it from the input.
const ElfType OutputElfType =
- Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue())
+ Config.OutputArch ? getOutputElfType(Config.OutputArch.value())
: getOutputElfType(In);
if (Error E = handleArgs(Config, ELFConfig, **Obj))
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index b241bd817ff5..f0e4f91cd347 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -463,13 +463,12 @@ Error ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
? (ZlibGnuMagic.size() + sizeof(Sec.Size))
: sizeof(Elf_Chdr_Impl<ELFT>);
- StringRef CompressedContent(
- reinterpret_cast<const char *>(Sec.OriginalData.data()) + DataOffset,
- Sec.OriginalData.size() - DataOffset);
-
- SmallVector<char, 128> DecompressedContent;
- if (Error Err = zlib::uncompress(CompressedContent, DecompressedContent,
- static_cast<size_t>(Sec.Size)))
+ ArrayRef<uint8_t> CompressedContent(Sec.OriginalData.data() + DataOffset,
+ Sec.OriginalData.size() - DataOffset);
+ SmallVector<uint8_t, 128> DecompressedContent;
+ if (Error Err =
+ compression::zlib::uncompress(CompressedContent, DecompressedContent,
+ static_cast<size_t>(Sec.Size)))
return createStringError(errc::invalid_argument,
"'" + Sec.Name + "': " + toString(std::move(Err)));
@@ -544,9 +543,7 @@ CompressedSection::CompressedSection(const SectionBase &Sec,
DebugCompressionType CompressionType)
: SectionBase(Sec), CompressionType(CompressionType),
DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) {
- zlib::compress(StringRef(reinterpret_cast<const char *>(OriginalData.data()),
- OriginalData.size()),
- CompressedData);
+ compression::zlib::compress(OriginalData, CompressedData);
assert(CompressionType != DebugCompressionType::None);
Flags |= ELF::SHF_COMPRESSED;
@@ -2643,9 +2640,12 @@ Error BinaryWriter::finalize() {
// MinAddr will be skipped.
uint64_t MinAddr = UINT64_MAX;
for (SectionBase &Sec : Obj.allocSections()) {
+ // If Sec's type is changed from SHT_NOBITS due to --set-section-flags,
+ // Offset may not be aligned. Align it to max(Align, 1).
if (Sec.ParentSegment != nullptr)
- Sec.Addr =
- Sec.Offset - Sec.ParentSegment->Offset + Sec.ParentSegment->PAddr;
+ Sec.Addr = alignTo(Sec.Offset - Sec.ParentSegment->Offset +
+ Sec.ParentSegment->PAddr,
+ std::max(Sec.Align, uint64_t(1)));
if (Sec.Type != SHT_NOBITS && Sec.Size > 0)
MinAddr = std::min(MinAddr, Sec.Addr);
}
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index f33bbb029c9b..799db5034532 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -539,7 +539,7 @@ class CompressedSection : public SectionBase {
DebugCompressionType CompressionType;
uint64_t DecompressedSize;
uint64_t DecompressedAlign;
- SmallVector<char, 128> CompressedData;
+ SmallVector<uint8_t, 128> CompressedData;
public:
CompressedSection(const SectionBase &Sec,
diff --git a/llvm/lib/Object/Decompressor.cpp b/llvm/lib/Object/Decompressor.cpp
index de067ed59ac5..a6a28a0589ac 100644
--- a/llvm/lib/Object/Decompressor.cpp
+++ b/llvm/lib/Object/Decompressor.cpp
@@ -19,7 +19,7 @@ using namespace object;
Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data,
bool IsLE, bool Is64Bit) {
- if (!zlib::isAvailable())
+ if (!compression::zlib::isAvailable())
return createError("zlib is not available");
Decompressor D(Data);
@@ -92,7 +92,8 @@ bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) {
return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name);
}
-Error Decompressor::decompress(MutableArrayRef<char> Buffer) {
+Error Decompressor::decompress(MutableArrayRef<uint8_t> Buffer) {
size_t Size = Buffer.size();
- return zlib::uncompress(SectionData, Buffer.data(), Size);
+ return compression::zlib::uncompress(arrayRefFromStringRef(SectionData),
+ Buffer.data(), Size);
}
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 6acf4543be5a..0d5aa91c1348 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -297,6 +297,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_PHDR);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP_V0);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP);
+ STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_OFFLOADING);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef);
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 38de669f1d3d..1f342e55e77f 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -168,11 +168,11 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
Optional<unsigned> Attr =
Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
if (Attr)
- isV7 = Attr.getValue() == ARMBuildAttrs::v7;
+ isV7 = Attr.value() == ARMBuildAttrs::v7;
Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile);
if (Attr) {
- switch (Attr.getValue()) {
+ switch (Attr.value()) {
case ARMBuildAttrs::ApplicationProfile:
Features.AddFeature("aclass");
break;
@@ -191,7 +191,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
Attr = Attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use);
if (Attr) {
- switch (Attr.getValue()) {
+ switch (Attr.value()) {
default:
break;
case ARMBuildAttrs::Not_Allowed:
@@ -206,7 +206,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
Attr = Attributes.getAttributeValue(ARMBuildAttrs::FP_arch);
if (Attr) {
- switch (Attr.getValue()) {
+ switch (Attr.value()) {
default:
break;
case ARMBuildAttrs::Not_Allowed:
@@ -230,7 +230,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
Attr = Attributes.getAttributeValue(ARMBuildAttrs::Advanced_SIMD_arch);
if (Attr) {
- switch (Attr.getValue()) {
+ switch (Attr.value()) {
default:
break;
case ARMBuildAttrs::Not_Allowed:
@@ -249,7 +249,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
Attr = Attributes.getAttributeValue(ARMBuildAttrs::MVE_arch);
if (Attr) {
- switch (Attr.getValue()) {
+ switch (Attr.value()) {
default:
break;
case ARMBuildAttrs::Not_Allowed:
@@ -268,7 +268,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
Attr = Attributes.getAttributeValue(ARMBuildAttrs::DIV_use);
if (Attr) {
- switch (Attr.getValue()) {
+ switch (Attr.value()) {
default:
break;
case ARMBuildAttrs::DisallowDIV:
@@ -524,7 +524,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
Optional<unsigned> Attr =
Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
if (Attr) {
- switch (Attr.getValue()) {
+ switch (Attr.value()) {
case ARMBuildAttrs::v4:
Triple += "v4";
break;
@@ -556,7 +556,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
Optional<unsigned> ArchProfileAttr =
Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile);
if (ArchProfileAttr &&
- ArchProfileAttr.getValue() == ARMBuildAttrs::MicroControllerProfile)
+ ArchProfileAttr.value() == ARMBuildAttrs::MicroControllerProfile)
Triple += "v7m";
else
Triple += "v7";
diff --git a/llvm/lib/Object/Error.cpp b/llvm/lib/Object/Error.cpp
index 6d1e3f2a59d0..62cb51ca09e4 100644
--- a/llvm/lib/Object/Error.cpp
+++ b/llvm/lib/Object/Error.cpp
@@ -13,7 +13,6 @@
#include "llvm/Object/Error.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
using namespace object;
@@ -75,10 +74,9 @@ void GenericBinaryError::log(raw_ostream &OS) const {
OS << Msg;
}
-static ManagedStatic<_object_error_category> error_category;
-
const std::error_category &object::object_category() {
- return *error_category;
+ static _object_error_category error_category;
+ return error_category;
}
llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) {
diff --git a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
index 9834b036de90..60870bbb801f 100644
--- a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
+++ b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
@@ -133,17 +133,17 @@ void DXContainerWriter::writeParts(raw_ostream &OS) {
// Compute the optional fields if needed...
if (P.Program->DXILOffset)
- Header.Bitcode.Offset = P.Program->DXILOffset.getValue();
+ Header.Bitcode.Offset = P.Program->DXILOffset.value();
else
Header.Bitcode.Offset = sizeof(dxbc::BitcodeHeader);
if (P.Program->DXILSize)
- Header.Bitcode.Size = P.Program->DXILSize.getValue();
+ Header.Bitcode.Size = P.Program->DXILSize.value();
else
Header.Bitcode.Size = P.Program->DXIL ? P.Program->DXIL->size() : 0;
if (P.Program->Size)
- Header.Size = P.Program->Size.getValue();
+ Header.Size = P.Program->Size.value();
else
Header.Size = sizeof(dxbc::ProgramHeader) + Header.Bitcode.Size;
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index cdd180cdc15d..b778006cf66e 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -656,6 +656,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_LLVM_PART_PHDR);
ECase(SHT_LLVM_BB_ADDR_MAP_V0);
ECase(SHT_LLVM_BB_ADDR_MAP);
+ ECase(SHT_LLVM_OFFLOADING);
ECase(SHT_GNU_ATTRIBUTES);
ECase(SHT_GNU_HASH);
ECase(SHT_GNU_verdef);
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index a5345172aae1..593243144f01 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1728,8 +1728,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// in ICP (which is performed earlier than this in the regular LTO pipeline).
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- // Enable splitting late in the FullLTO post-link pipeline. This is done in
- // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
+ // Enable splitting late in the FullLTO post-link pipeline.
if (EnableHotColdSplit)
MPM.addPass(HotColdSplittingPass());
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index ab9f8bf9c957..bad8184dffcf 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -901,10 +901,11 @@ bool OptNoneInstrumentation::shouldRun(StringRef PassID, Any IR) {
void OptBisectInstrumentation::registerCallbacks(
PassInstrumentationCallbacks &PIC) {
- if (!OptBisector->isEnabled())
+ if (!getOptBisector().isEnabled())
return;
PIC.registerShouldRunOptionalPassCallback([](StringRef PassID, Any IR) {
- return isIgnored(PassID) || OptBisector->checkPass(PassID, getIRName(IR));
+ return isIgnored(PassID) ||
+ getOptBisector().checkPass(PassID, getIRName(IR));
});
}
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index f9e58fd6afa5..f4f13bafb233 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -25,7 +25,6 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -897,10 +896,9 @@ std::string CoverageMapError::message() const {
return getCoverageMapErrString(Err);
}
-static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory;
-
const std::error_category &llvm::coverage::coveragemap_category() {
- return *ErrorCategory;
+ static CoverageMappingErrorCategoryType ErrorCategory;
+ return ErrorCategory;
}
char CoverageMapError::ID = 0;
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index 1a187795a8a0..552140a52ad4 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -119,26 +119,26 @@ Error RawCoverageFilenamesReader::read(CovMapVersion Version) {
return Err;
if (CompressedLen > 0) {
- if (!zlib::isAvailable())
+ if (!compression::zlib::isAvailable())
return make_error<CoverageMapError>(
coveragemap_error::decompression_failed);
// Allocate memory for the decompressed filenames.
- SmallVector<char, 0> StorageBuf;
+ SmallVector<uint8_t, 0> StorageBuf;
// Read compressed filenames.
StringRef CompressedFilenames = Data.substr(0, CompressedLen);
Data = Data.substr(CompressedLen);
- auto Err =
- zlib::uncompress(CompressedFilenames, StorageBuf, UncompressedLen);
+ auto Err = compression::zlib::uncompress(
+ arrayRefFromStringRef(CompressedFilenames), StorageBuf,
+ UncompressedLen);
if (Err) {
consumeError(std::move(Err));
return make_error<CoverageMapError>(
coveragemap_error::decompression_failed);
}
- StringRef UncompressedFilenames(StorageBuf.data(), StorageBuf.size());
- RawCoverageFilenamesReader Delegate(UncompressedFilenames, Filenames,
+ RawCoverageFilenamesReader Delegate(toStringRef(StorageBuf), Filenames,
CompilationDir);
return Delegate.readUncompressed(Version, NumFilenames);
}
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index 781a2901dbb9..db9be34d5248 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -46,11 +46,13 @@ void CoverageFilenamesSectionWriter::write(raw_ostream &OS, bool Compress) {
}
}
- SmallString<128> CompressedStr;
- bool doCompression =
- Compress && zlib::isAvailable() && DoInstrProfNameCompression;
+ SmallVector<uint8_t, 128> CompressedStr;
+ bool doCompression = Compress && compression::zlib::isAvailable() &&
+ DoInstrProfNameCompression;
if (doCompression)
- zlib::compress(FilenamesStr, CompressedStr, zlib::BestSizeCompression);
+ compression::zlib::compress(arrayRefFromStringRef(FilenamesStr),
+ CompressedStr,
+ compression::zlib::BestSizeCompression);
// ::= <num-filenames>
// <uncompressed-len>
@@ -59,7 +61,7 @@ void CoverageFilenamesSectionWriter::write(raw_ostream &OS, bool Compress) {
encodeULEB128(Filenames.size(), OS);
encodeULEB128(FilenamesStr.size(), OS);
encodeULEB128(doCompression ? CompressedStr.size() : 0U, OS);
- OS << (doCompression ? CompressedStr.str() : StringRef(FilenamesStr));
+ OS << (doCompression ? toStringRef(CompressedStr) : StringRef(FilenamesStr));
}
namespace {
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 48ac5ce0d607..f8d7c4d36481 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -39,7 +39,6 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SwapByteOrder.h"
@@ -177,10 +176,9 @@ class InstrProfErrorCategoryType : public std::error_category {
} // end anonymous namespace
-static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory;
-
const std::error_category &llvm::instrprof_category() {
- return *ErrorCategory;
+ static InstrProfErrorCategoryType ErrorCategory;
+ return ErrorCategory;
}
namespace {
@@ -466,12 +464,13 @@ Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
return WriteStringToResult(0, UncompressedNameStrings);
}
- SmallString<128> CompressedNameStrings;
- zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
- zlib::BestSizeCompression);
+ SmallVector<uint8_t, 128> CompressedNameStrings;
+ compression::zlib::compress(arrayRefFromStringRef(UncompressedNameStrings),
+ CompressedNameStrings,
+ compression::zlib::BestSizeCompression);
return WriteStringToResult(CompressedNameStrings.size(),
- CompressedNameStrings);
+ toStringRef(CompressedNameStrings));
}
StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar) {
@@ -488,7 +487,7 @@ Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
NameStrs.push_back(std::string(getPGOFuncNameVarInitializer(NameVar)));
}
return collectPGOFuncNameStrings(
- NameStrs, zlib::isAvailable() && doCompression, Result);
+ NameStrs, compression::zlib::isAvailable() && doCompression, Result);
}
Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
@@ -501,23 +500,20 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
uint64_t CompressedSize = decodeULEB128(P, &N);
P += N;
bool isCompressed = (CompressedSize != 0);
- SmallString<128> UncompressedNameStrings;
+ SmallVector<uint8_t, 128> UncompressedNameStrings;
StringRef NameStrings;
if (isCompressed) {
- if (!llvm::zlib::isAvailable())
+ if (!llvm::compression::zlib::isAvailable())
return make_error<InstrProfError>(instrprof_error::zlib_unavailable);
- StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
- CompressedSize);
- if (Error E =
- zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
- UncompressedSize)) {
+ if (Error E = compression::zlib::uncompress(
+ makeArrayRef(P, CompressedSize), UncompressedNameStrings,
+ UncompressedSize)) {
consumeError(std::move(E));
return make_error<InstrProfError>(instrprof_error::uncompress_failed);
}
P += CompressedSize;
- NameStrings = StringRef(UncompressedNameStrings.data(),
- UncompressedNameStrings.size());
+ NameStrings = toStringRef(UncompressedNameStrings);
} else {
NameStrings =
StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index f794e64a13e7..b4d5550a1721 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -20,7 +20,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <system_error>
@@ -98,10 +97,9 @@ class SampleProfErrorCategoryType : public std::error_category {
} // end anonymous namespace
-static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory;
-
const std::error_category &llvm::sampleprof_category() {
- return *ErrorCategory;
+ static SampleProfErrorCategoryType ErrorCategory;
+ return ErrorCategory;
}
void LineLocation::print(raw_ostream &OS) const {
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 280e3c6cb8d1..204e34bff879 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -877,15 +877,13 @@ std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
if (std::error_code EC = CompressSize.getError())
return EC;
- if (!llvm::zlib::isAvailable())
+ if (!llvm::compression::zlib::isAvailable())
return sampleprof_error::zlib_unavailable;
- StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
- *CompressSize);
- char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
+ uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize);
size_t UCSize = DecompressBufSize;
- llvm::Error E =
- zlib::uncompress(CompressedStrings, Buffer, UCSize);
+ llvm::Error E = compression::zlib::uncompress(
+ makeArrayRef(Data, *CompressSize), Buffer, UCSize);
if (E)
return sampleprof_error::uncompress_failed;
DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 8ec6b7ebc29e..093790afe2d6 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -78,19 +78,20 @@ SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type,
}
std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() {
- if (!llvm::zlib::isAvailable())
+ if (!llvm::compression::zlib::isAvailable())
return sampleprof_error::zlib_unavailable;
std::string &UncompressedStrings =
static_cast<raw_string_ostream *>(LocalBufStream.get())->str();
if (UncompressedStrings.size() == 0)
return sampleprof_error::success;
auto &OS = *OutputStream;
- SmallString<128> CompressedStrings;
- zlib::compress(UncompressedStrings, CompressedStrings,
- zlib::BestSizeCompression);
+ SmallVector<uint8_t, 128> CompressedStrings;
+ compression::zlib::compress(arrayRefFromStringRef(UncompressedStrings),
+ CompressedStrings,
+ compression::zlib::BestSizeCompression);
encodeULEB128(UncompressedStrings.size(), OS);
encodeULEB128(CompressedStrings.size(), OS);
- OS << CompressedStrings.str();
+ OS << toStringRef(CompressedStrings);
UncompressedStrings.clear();
return sampleprof_error::success;
}
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index eb6c04d987b3..e3df172ef113 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -1862,8 +1862,10 @@ void basic_parser_impl::printOptionInfo(const Option &O,
outs() << " <" << getValueStr(O, ValName) << ">...";
} else if (O.getValueExpectedFlag() == ValueOptional)
outs() << "[=<" << getValueStr(O, ValName) << ">]";
- else
- outs() << "=<" << getValueStr(O, ValName) << '>';
+ else {
+ outs() << (O.ArgStr.size() == 1 ? " <" : "=<") << getValueStr(O, ValName)
+ << '>';
+ }
}
Option::printHelpStr(O.HelpStr, GlobalWidth, getOptionWidth(O));
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index 983a6348bbe4..21191972fb8b 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -22,11 +22,9 @@
#endif
using namespace llvm;
+using namespace llvm::compression;
#if LLVM_ENABLE_ZLIB
-static Error createError(StringRef Err) {
- return make_error<StringError>(Err, inconvertibleErrorCode());
-}
static StringRef convertZlibCodeToString(int Code) {
switch (Code) {
@@ -46,63 +44,59 @@ static StringRef convertZlibCodeToString(int Code) {
bool zlib::isAvailable() { return true; }
-void zlib::compress(StringRef InputBuffer,
- SmallVectorImpl<char> &CompressedBuffer, int Level) {
- unsigned long CompressedSize = ::compressBound(InputBuffer.size());
+void zlib::compress(ArrayRef<uint8_t> Input,
+ SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
+ unsigned long CompressedSize = ::compressBound(Input.size());
CompressedBuffer.resize_for_overwrite(CompressedSize);
- int Res =
- ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
- (const Bytef *)InputBuffer.data(), InputBuffer.size(), Level);
+ int Res = ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
+ (const Bytef *)Input.data(), Input.size(), Level);
if (Res == Z_MEM_ERROR)
report_bad_alloc_error("Allocation failed");
assert(Res == Z_OK);
// Tell MemorySanitizer that zlib output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZLib.
__msan_unpoison(CompressedBuffer.data(), CompressedSize);
- CompressedBuffer.truncate(CompressedSize);
+ if (CompressedSize < CompressedBuffer.size())
+ CompressedBuffer.truncate(CompressedSize);
}
-Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
+Error zlib::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
size_t &UncompressedSize) {
int Res =
::uncompress((Bytef *)UncompressedBuffer, (uLongf *)&UncompressedSize,
- (const Bytef *)InputBuffer.data(), InputBuffer.size());
+ (const Bytef *)Input.data(), Input.size());
// Tell MemorySanitizer that zlib output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZLib.
__msan_unpoison(UncompressedBuffer, UncompressedSize);
- return Res ? createError(convertZlibCodeToString(Res)) : Error::success();
+ return Res ? make_error<StringError>(convertZlibCodeToString(Res),
+ inconvertibleErrorCode())
+ : Error::success();
}
-Error zlib::uncompress(StringRef InputBuffer,
- SmallVectorImpl<char> &UncompressedBuffer,
+Error zlib::uncompress(ArrayRef<uint8_t> Input,
+ SmallVectorImpl<uint8_t> &UncompressedBuffer,
size_t UncompressedSize) {
UncompressedBuffer.resize_for_overwrite(UncompressedSize);
Error E =
- uncompress(InputBuffer, UncompressedBuffer.data(), UncompressedSize);
- UncompressedBuffer.truncate(UncompressedSize);
+ zlib::uncompress(Input, UncompressedBuffer.data(), UncompressedSize);
+ if (UncompressedSize < UncompressedBuffer.size())
+ UncompressedBuffer.truncate(UncompressedSize);
return E;
}
-uint32_t zlib::crc32(StringRef Buffer) {
- return ::crc32(0, (const Bytef *)Buffer.data(), Buffer.size());
-}
-
#else
bool zlib::isAvailable() { return false; }
-void zlib::compress(StringRef InputBuffer,
- SmallVectorImpl<char> &CompressedBuffer, int Level) {
+void zlib::compress(ArrayRef<uint8_t> Input,
+ SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
llvm_unreachable("zlib::compress is unavailable");
}
-Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
+Error zlib::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
size_t &UncompressedSize) {
llvm_unreachable("zlib::uncompress is unavailable");
}
-Error zlib::uncompress(StringRef InputBuffer,
- SmallVectorImpl<char> &UncompressedBuffer,
+Error zlib::uncompress(ArrayRef<uint8_t> Input,
+ SmallVectorImpl<uint8_t> &UncompressedBuffer,
size_t UncompressedSize) {
llvm_unreachable("zlib::uncompress is unavailable");
}
-uint32_t zlib::crc32(StringRef Buffer) {
- llvm_unreachable("zlib::crc32 is unavailable");
-}
#endif
diff --git a/llvm/lib/Support/ConvertUTF.cpp b/llvm/lib/Support/ConvertUTF.cpp
index e24a918c5c89..5436f557b993 100644
--- a/llvm/lib/Support/ConvertUTF.cpp
+++ b/llvm/lib/Support/ConvertUTF.cpp
@@ -417,6 +417,16 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
return isLegalUTF8(source, length);
}
+/*
+ * Exported function to return the size of the first utf-8 code unit sequence,
+ * Or 0 if the sequence is not valid;
+ */
+unsigned getUTF8SequenceSize(const UTF8 *source, const UTF8 *sourceEnd) {
+ int length = trailingBytesForUTF8[*source] + 1;
+ return (length <= sourceEnd - source && isLegalUTF8(source, length)) ? length
+ : 0;
+}
+
/* --------------------------------------------------------------------- */
static unsigned
diff --git a/llvm/lib/Support/Error.cpp b/llvm/lib/Support/Error.cpp
index 8bfc8ee7a8cc..fbe86f2b59e1 100644
--- a/llvm/lib/Support/Error.cpp
+++ b/llvm/lib/Support/Error.cpp
@@ -9,7 +9,6 @@
#include "llvm/Support/Error.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include <system_error>
using namespace llvm;
@@ -46,7 +45,10 @@ namespace {
}
-static ManagedStatic<ErrorErrorCategory> ErrorErrorCat;
+ErrorErrorCategory &getErrorErrorCat() {
+ static ErrorErrorCategory ErrorErrorCat;
+ return ErrorErrorCat;
+}
namespace llvm {
@@ -71,19 +73,19 @@ void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner) {
std::error_code ErrorList::convertToErrorCode() const {
return std::error_code(static_cast<int>(ErrorErrorCode::MultipleErrors),
- *ErrorErrorCat);
+ getErrorErrorCat());
}
std::error_code inconvertibleErrorCode() {
return std::error_code(static_cast<int>(ErrorErrorCode::InconvertibleError),
- *ErrorErrorCat);
+ getErrorErrorCat());
}
std::error_code FileError::convertToErrorCode() const {
std::error_code NestedEC = Err->convertToErrorCode();
if (NestedEC == inconvertibleErrorCode())
return std::error_code(static_cast<int>(ErrorErrorCode::FileError),
- *ErrorErrorCat);
+ getErrorErrorCat());
return NestedEC;
}
diff --git a/llvm/lib/Support/Process.cpp b/llvm/lib/Support/Process.cpp
index cf3962ae927b..5476becc2945 100644
--- a/llvm/lib/Support/Process.cpp
+++ b/llvm/lib/Support/Process.cpp
@@ -47,7 +47,7 @@ Optional<std::string> Process::FindInEnvPath(StringRef EnvName,
const char EnvPathSeparatorStr[] = {Separator, '\0'};
SmallVector<StringRef, 8> Dirs;
- SplitString(OptPath.getValue(), Dirs, EnvPathSeparatorStr);
+ SplitString(OptPath.value(), Dirs, EnvPathSeparatorStr);
for (StringRef Dir : Dirs) {
if (Dir.empty())
diff --git a/llvm/lib/Support/Unicode.cpp b/llvm/lib/Support/Unicode.cpp
index 103710303094..d4d7e75b739d 100644
--- a/llvm/lib/Support/Unicode.cpp
+++ b/llvm/lib/Support/Unicode.cpp
@@ -269,7 +269,7 @@ bool isPrintable(int UCS) {
}
/// Unicode code points of the Cf category are considered
-/// fornatting characters.
+/// formatting characters.
bool isFormatting(int UCS) {
// https://unicode.org/Public/14.0.0/ucdxml/
diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc
index 3c2d118977c5..c1959b5cc2ae 100644
--- a/llvm/lib/Support/Unix/Process.inc
+++ b/llvm/lib/Support/Unix/Process.inc
@@ -14,7 +14,6 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
-#include "llvm/Support/ManagedStatic.h"
#include <mutex>
#if HAVE_FCNTL_H
#include <fcntl.h>
@@ -327,10 +326,6 @@ extern "C" int del_curterm(struct term *termp);
extern "C" int tigetnum(char *capname);
#endif
-#ifdef LLVM_ENABLE_TERMINFO
-static ManagedStatic<std::mutex> TermColorMutex;
-#endif
-
bool checkTerminalEnvironmentForColors() {
if (const char *TermStr = std::getenv("TERM")) {
return StringSwitch<bool>(TermStr)
@@ -351,7 +346,8 @@ bool checkTerminalEnvironmentForColors() {
static bool terminalHasColors(int fd) {
#ifdef LLVM_ENABLE_TERMINFO
// First, acquire a global lock because these C routines are thread hostile.
- std::lock_guard<std::mutex> G(*TermColorMutex);
+ static std::mutex TermColorMutex;
+ std::lock_guard<std::mutex> G(TermColorMutex);
struct term *previous_term = set_curterm(nullptr);
int errret = 0;
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 21f0c39bfd6e..97d63fff1069 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -2669,13 +2669,13 @@ void JSONWriter::write(ArrayRef<YAMLVFSEntry> Entries,
" 'version': 0,\n";
if (IsCaseSensitive)
OS << " 'case-sensitive': '"
- << (IsCaseSensitive.getValue() ? "true" : "false") << "',\n";
+ << (IsCaseSensitive.value() ? "true" : "false") << "',\n";
if (UseExternalNames)
OS << " 'use-external-names': '"
- << (UseExternalNames.getValue() ? "true" : "false") << "',\n";
+ << (UseExternalNames.value() ? "true" : "false") << "',\n";
bool UseOverlayRelative = false;
if (IsOverlayRelative) {
- UseOverlayRelative = IsOverlayRelative.getValue();
+ UseOverlayRelative = IsOverlayRelative.value();
OS << " 'overlay-relative': '" << (UseOverlayRelative ? "true" : "false")
<< "',\n";
}
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index 32477de5184b..1621f4a54b79 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -731,6 +731,11 @@ static bool GetDumpType(HKEY Key, MINIDUMP_TYPE &ResultType) {
/// otherwise.
static std::error_code WINAPI
WriteWindowsDumpFile(PMINIDUMP_EXCEPTION_INFORMATION ExceptionInfo) {
+ struct ScopedCriticalSection {
+ ScopedCriticalSection() { EnterCriticalSection(&CriticalSection); }
+ ~ScopedCriticalSection() { LeaveCriticalSection(&CriticalSection); }
+ } SCS;
+
using namespace llvm;
using namespace llvm::sys;
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index 10f9692d217e..2567f3ed8034 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -285,8 +285,9 @@ constexpr FeatureBitset FeaturesZNVER1 =
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 |
FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC |
FeatureXSAVEOPT | FeatureXSAVES;
-constexpr FeatureBitset FeaturesZNVER2 =
- FeaturesZNVER1 | FeatureCLWB | FeatureRDPID | FeatureWBNOINVD;
+constexpr FeatureBitset FeaturesZNVER2 = FeaturesZNVER1 | FeatureCLWB |
+ FeatureRDPID | FeatureRDPRU |
+ FeatureWBNOINVD;
static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 |
FeatureINVPCID | FeaturePKU |
FeatureVAES | FeatureVPCLMULQDQ;
@@ -490,6 +491,7 @@ constexpr FeatureBitset ImpliedFeaturesPREFETCHWT1 = {};
constexpr FeatureBitset ImpliedFeaturesPRFCHW = {};
constexpr FeatureBitset ImpliedFeaturesPTWRITE = {};
constexpr FeatureBitset ImpliedFeaturesRDPID = {};
+constexpr FeatureBitset ImpliedFeaturesRDPRU = {};
constexpr FeatureBitset ImpliedFeaturesRDRND = {};
constexpr FeatureBitset ImpliedFeaturesRDSEED = {};
constexpr FeatureBitset ImpliedFeaturesRTM = {};
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 98ceea3c3c7a..651949ad5765 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -429,7 +429,7 @@ raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) {
indent(FB.IndentLevel);
if (FB.FirstByteOffset) {
- uint64_t Offset = FB.FirstByteOffset.getValue();
+ uint64_t Offset = FB.FirstByteOffset.value();
llvm::write_hex(*this, Offset + LineIndex, HPS, OffsetWidth);
*this << ": ";
}
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 6c205104d569..75a99e95541a 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -2601,7 +2601,7 @@ StringRef Record::getValueAsString(StringRef FieldName) const {
if (!S)
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
- return S.getValue();
+ return S.value();
}
llvm::Optional<StringRef>
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index f092c039b58e..b332e9dcb176 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -650,6 +650,7 @@ include "AArch64SchedA64FX.td"
include "AArch64SchedThunderX3T110.td"
include "AArch64SchedTSV110.td"
include "AArch64SchedAmpere1.td"
+include "AArch64SchedNeoverseN2.td"
def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors">;
@@ -1137,7 +1138,7 @@ def : ProcessorModel<"cortex-a78", CortexA57Model, ProcessorFeatures.A78,
[TuneA78]>;
def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C,
[TuneA78C]>;
-def : ProcessorModel<"cortex-a710", CortexA57Model, ProcessorFeatures.A710,
+def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710,
[TuneA710]>;
def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
[TuneR82]>;
@@ -1145,17 +1146,17 @@ def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
[TuneX1]>;
def : ProcessorModel<"cortex-x1c", CortexA57Model, ProcessorFeatures.X1C,
[TuneX1]>;
-def : ProcessorModel<"cortex-x2", CortexA57Model, ProcessorFeatures.X2,
+def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2,
[TuneX2]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model,
ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
def : ProcessorModel<"neoverse-n1", CortexA57Model,
ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>;
-def : ProcessorModel<"neoverse-n2", CortexA57Model,
+def : ProcessorModel<"neoverse-n2", NeoverseN2Model,
ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>;
-def : ProcessorModel<"neoverse-512tvb", CortexA57Model,
+def : ProcessorModel<"neoverse-512tvb", NeoverseN2Model,
ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>;
-def : ProcessorModel<"neoverse-v1", CortexA57Model,
+def : ProcessorModel<"neoverse-v1", NeoverseN2Model,
ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3,
[TuneExynosM3]>;
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index ef4860979dd3..c568f73471e1 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1173,6 +1173,8 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
#include "AArch64GenMCPseudoLowering.inc"
void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
+ AArch64_MC::verifyInstructionPredicates(MI->getOpcode(), STI->getFeatureBits());
+
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index abfe2d507111..447ad10ddf22 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -237,6 +237,39 @@ static bool isMergePassthruOpcode(unsigned Opc) {
}
}
+// Returns true if inactive lanes are known to be zeroed by construction.
+static bool isZeroingInactiveLanes(SDValue Op) {
+ switch (Op.getOpcode()) {
+ default:
+ // We guarantee i1 splat_vectors to zero the other lanes by
+ // implementing it with ptrue and possibly a punpklo for nxv1i1.
+ if (ISD::isConstantSplatVectorAllOnes(Op.getNode()))
+ return true;
+ return false;
+ case AArch64ISD::PTRUE:
+ case AArch64ISD::SETCC_MERGE_ZERO:
+ return true;
+ case ISD::INTRINSIC_WO_CHAIN:
+ switch (Op.getConstantOperandVal(0)) {
+ default:
+ return false;
+ case Intrinsic::aarch64_sve_ptrue:
+ case Intrinsic::aarch64_sve_pnext:
+ case Intrinsic::aarch64_sve_cmpeq_wide:
+ case Intrinsic::aarch64_sve_cmpne_wide:
+ case Intrinsic::aarch64_sve_cmpge_wide:
+ case Intrinsic::aarch64_sve_cmpgt_wide:
+ case Intrinsic::aarch64_sve_cmplt_wide:
+ case Intrinsic::aarch64_sve_cmple_wide:
+ case Intrinsic::aarch64_sve_cmphs_wide:
+ case Intrinsic::aarch64_sve_cmphi_wide:
+ case Intrinsic::aarch64_sve_cmplo_wide:
+ case Intrinsic::aarch64_sve_cmpls_wide:
+ return true;
+ }
+ }
+}
+
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -1082,6 +1115,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
}
+ // FIXME: Move lowering for more nodes here if those are common between
+ // SVE and SME.
+ if (Subtarget->hasSVE() || Subtarget->hasSME()) {
+ for (auto VT :
+ {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ }
+ }
+
if (Subtarget->hasSVE()) {
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -1162,14 +1205,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
@@ -2429,6 +2470,23 @@ AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const {
return BB;
}
+MachineBasicBlock *
+AArch64TargetLowering::EmitAddVectorToTile(unsigned Opc, unsigned BaseReg,
+ MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
+
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm());
+ MIB.add(MI.getOperand(1)); // pn
+ MIB.add(MI.getOperand(2)); // pm
+ MIB.add(MI.getOperand(3)); // zn
+
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
@@ -2561,6 +2619,14 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
BB);
case AArch64::ZERO_M_PSEUDO:
return EmitZero(MI, BB);
+ case AArch64::ADDHA_MPPZ_PSEUDO_S:
+ return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::ADDVA_MPPZ_PSEUDO_S:
+ return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::ADDHA_MPPZ_PSEUDO_D:
+ return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::ADDVA_MPPZ_PSEUDO_D:
+ return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_D, AArch64::ZAD0, MI, BB);
}
}
@@ -4329,55 +4395,49 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
int Pattern) {
+ if (VT == MVT::nxv1i1 && Pattern == AArch64SVEPredPattern::all)
+ return DAG.getConstant(1, DL, MVT::nxv1i1);
return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
DAG.getTargetConstant(Pattern, DL, MVT::i32));
}
-static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
+// Returns a safe bitcast between two scalable vector predicates, where
+// any newly created lanes from a widening bitcast are defined as zero.
+static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- EVT OutVT = Op.getValueType();
- SDValue InOp = Op.getOperand(1);
- EVT InVT = InOp.getValueType();
+ EVT InVT = Op.getValueType();
+
+ assert(InVT.getVectorElementType() == MVT::i1 &&
+ VT.getVectorElementType() == MVT::i1 &&
+ "Expected a predicate-to-predicate bitcast");
+ assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ InVT.isScalableVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&
+ "Only expect to cast between legal scalable predicate types!");
// Return the operand if the cast isn't changing type,
- // i.e. <n x 16 x i1> -> <n x 16 x i1>
- if (InVT == OutVT)
- return InOp;
+ // e.g. <n x 16 x i1> -> <n x 16 x i1>
+ if (InVT == VT)
+ return Op;
- SDValue Reinterpret =
- DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);
+ SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
- // If the argument converted to an svbool is a ptrue or a comparison, the
- // lanes introduced by the widening are zero by construction.
- switch (InOp.getOpcode()) {
- case AArch64ISD::SETCC_MERGE_ZERO:
+ // We only have to zero the lanes if new lanes are being defined, e.g. when
+ // casting from <vscale x 2 x i1> to <vscale x 16 x i1>. If this is not the
+ // case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then
+ // we can return here.
+ if (InVT.bitsGT(VT))
return Reinterpret;
- case ISD::INTRINSIC_WO_CHAIN:
- switch (InOp.getConstantOperandVal(0)) {
- case Intrinsic::aarch64_sve_ptrue:
- case Intrinsic::aarch64_sve_cmpeq_wide:
- case Intrinsic::aarch64_sve_cmpne_wide:
- case Intrinsic::aarch64_sve_cmpge_wide:
- case Intrinsic::aarch64_sve_cmpgt_wide:
- case Intrinsic::aarch64_sve_cmplt_wide:
- case Intrinsic::aarch64_sve_cmple_wide:
- case Intrinsic::aarch64_sve_cmphs_wide:
- case Intrinsic::aarch64_sve_cmphi_wide:
- case Intrinsic::aarch64_sve_cmplo_wide:
- case Intrinsic::aarch64_sve_cmpls_wide:
- return Reinterpret;
- }
- }
- // Splat vectors of one will generate ptrue instructions
- if (ISD::isConstantSplatVectorAllOnes(InOp.getNode()))
+ // Check if the other lanes are already known to be zeroed by
+ // construction.
+ if (isZeroingInactiveLanes(Op))
return Reinterpret;
- // Otherwise, zero the newly introduced lanes.
- SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
- SDValue MaskReinterpret =
- DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask);
- return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
+ // Zero the newly introduced lanes.
+ SDValue Mask = DAG.getConstant(1, DL, InVT);
+ Mask = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Mask);
+ return DAG.getNode(ISD::AND, DL, VT, Reinterpret, Mask);
}
SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
@@ -4546,10 +4606,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_dupq_lane:
return LowerDUPQLane(Op, DAG);
case Intrinsic::aarch64_sve_convert_from_svbool:
- return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
- Op.getOperand(1));
+ return getSVEPredicateBitCast(Op.getValueType(), Op.getOperand(1), DAG);
case Intrinsic::aarch64_sve_convert_to_svbool:
- return lowerConvertToSVBool(Op, DAG);
+ return getSVEPredicateBitCast(MVT::nxv16i1, Op.getOperand(1), DAG);
case Intrinsic::aarch64_sve_fneg:
return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -6393,9 +6452,8 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
if (SizeInBits < 8)
return false;
- APInt LowBits(SizeInBits, 0xFF);
APInt RequredZero(SizeInBits, 0xFE);
- KnownBits Bits = DAG.computeKnownBits(Arg, LowBits, 4);
+ KnownBits Bits = DAG.computeKnownBits(Arg, 4);
bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
return ZExtBool;
}
@@ -14814,16 +14872,6 @@ static SDValue performANDCombine(SDNode *N,
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
- // Although NEON has no EORV instruction, when only the least significant bit
- // is required the operation is synonymous with ADDV.
- if (LHS.getOpcode() == ISD::VECREDUCE_XOR && isOneConstant(RHS) &&
- LHS.getOperand(0).getValueType().isFixedLengthVector() &&
- LHS.hasOneUse()) {
- SDLoc DL(N);
- SDValue ADDV = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, LHS.getOperand(0));
- return DAG.getNode(ISD::AND, DL, VT, ADDV, RHS);
- }
-
if (VT.isScalableVector())
return performSVEAndCombine(N, DCI);
@@ -16126,12 +16174,24 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
assert(Op.getValueType().isScalableVector() &&
TLI.isTypeLegal(Op.getValueType()) &&
"Expected legal scalable vector type!");
+ assert(Op.getValueType() == Pg.getValueType() &&
+ "Expected same type for PTEST operands");
// Ensure target specific opcodes are using legal type.
EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue TVal = DAG.getConstant(1, DL, OutVT);
SDValue FVal = DAG.getConstant(0, DL, OutVT);
+ // Ensure operands have type nxv16i1.
+ if (Op.getValueType() != MVT::nxv16i1) {
+ if ((Cond == AArch64CC::ANY_ACTIVE || Cond == AArch64CC::NONE_ACTIVE) &&
+ isZeroingInactiveLanes(Op))
+ Pg = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, Pg);
+ else
+ Pg = getSVEPredicateBitCast(MVT::nxv16i1, Pg, DAG);
+ Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, Op);
+ }
+
// Set condition code (CC) flags.
SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
@@ -18026,6 +18086,54 @@ static SDValue performCSELCombine(SDNode *N,
return performCONDCombine(N, DCI, DAG, 2, 3);
}
+// Try to re-use an already extended operand of a vector SetCC feeding a
+// extended select. Doing so avoids requiring another full extension of the
+// SET_CC result when lowering the select.
+static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) {
+ EVT Op0MVT = Op->getOperand(0).getValueType();
+ if (!Op0MVT.isVector() || Op->use_empty())
+ return SDValue();
+
+ // Make sure that all uses of Op are VSELECTs with result matching types where
+ // the result type has a larger element type than the SetCC operand.
+ SDNode *FirstUse = *Op->use_begin();
+ if (FirstUse->getOpcode() != ISD::VSELECT)
+ return SDValue();
+ EVT UseMVT = FirstUse->getValueType(0);
+ if (UseMVT.getScalarSizeInBits() <= Op0MVT.getScalarSizeInBits())
+ return SDValue();
+ if (any_of(Op->uses(), [&UseMVT](const SDNode *N) {
+ return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
+ }))
+ return SDValue();
+
+ APInt V;
+ if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V))
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue Op0ExtV;
+ SDValue Op1ExtV;
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get();
+ // Check if the first operand of the SET_CC is already extended. If it is,
+ // split the SET_CC and re-use the extended version of the operand.
+ SDNode *Op0SExt = DAG.getNodeIfExists(ISD::SIGN_EXTEND, DAG.getVTList(UseMVT),
+ Op->getOperand(0));
+ SDNode *Op0ZExt = DAG.getNodeIfExists(ISD::ZERO_EXTEND, DAG.getVTList(UseMVT),
+ Op->getOperand(0));
+ if (Op0SExt && (isSignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
+ Op0ExtV = SDValue(Op0SExt, 0);
+ Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1));
+ } else if (Op0ZExt && (isUnsignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
+ Op0ExtV = SDValue(Op0ZExt, 0);
+ Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
+ } else
+ return SDValue();
+
+ return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1),
+ Op0ExtV, Op1ExtV, Op->getOperand(2));
+}
+
static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
SDValue LHS = N->getOperand(0);
@@ -18034,6 +18142,9 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
+ if (SDValue V = tryToWidenSetCCOperands(N, DAG))
+ return V;
+
// setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
if (Cond == ISD::SETNE && isOneConstant(RHS) &&
LHS->getOpcode() == AArch64ISD::CSEL &&
@@ -21045,7 +21156,7 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
default:
return SDValue();
case ISD::VECREDUCE_OR:
- if (isAllActivePredicate(DAG, Pg))
+ if (isAllActivePredicate(DAG, Pg) && OpVT == MVT::nxv16i1)
// The predicate can be 'Op' because
// vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);
@@ -21058,6 +21169,11 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
case ISD::VECREDUCE_XOR: {
SDValue ID =
DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
+ if (OpVT == MVT::nxv1i1) {
+ // Emulate a CNTP on .Q using .D and a different governing predicate.
+ Pg = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv2i1, Pg);
+ Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv2i1, Op);
+ }
SDValue Cntp =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
@@ -21464,22 +21580,17 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT InVT = Op.getValueType();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- (void)TLI;
- assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&
- InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&
+ assert(VT.isScalableVector() && isTypeLegal(VT) &&
+ InVT.isScalableVector() && isTypeLegal(InVT) &&
"Only expect to cast between legal scalable vector types!");
- assert((VT.getVectorElementType() == MVT::i1) ==
- (InVT.getVectorElementType() == MVT::i1) &&
- "Cannot cast between data and predicate scalable vector types!");
+ assert(VT.getVectorElementType() != MVT::i1 &&
+ InVT.getVectorElementType() != MVT::i1 &&
+ "For predicate bitcasts, use getSVEPredicateBitCast");
if (InVT == VT)
return Op;
- if (VT.getVectorElementType() == MVT::i1)
- return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
-
EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 06ea918ea32e..e02b5e56fd2e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -571,6 +571,9 @@ public:
MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitAddVectorToTile(unsigned Opc, unsigned BaseReg,
+ MachineInstr &MI,
+ MachineBasicBlock *BB) const;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
@@ -1148,6 +1151,7 @@ private:
// These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
// to transition between unpacked and packed types of the same element type,
// with BITCAST used otherwise.
+ // This function does not handle predicate bitcasts.
SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index c477a44b13b2..6839e73796a6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -29,21 +29,21 @@ def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>;
// An atomic load operation that does not need either acquire or release
// semantics.
-class relaxed_load<PatFrag base>
+class relaxed_load<PatFrags base>
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
let IsAtomic = 1;
let IsAtomicOrderingAcquireOrStronger = 0;
}
// A atomic load operation that actually needs acquire semantics.
-class acquiring_load<PatFrag base>
+class acquiring_load<PatFrags base>
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
let IsAtomic = 1;
let IsAtomicOrderingAcquire = 1;
}
// An atomic load operation that needs sequential consistency.
-class seq_cst_load<PatFrag base>
+class seq_cst_load<PatFrags base>
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
let IsAtomic = 1;
let IsAtomicOrderingSequentiallyConsistent = 1;
@@ -63,34 +63,34 @@ let Predicates = [HasLDAPR] in {
}
// 8-bit loads
-def : Pat<(seq_cst_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
-def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
-def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
+def : Pat<(seq_cst_load<atomic_load_az_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
+def : Pat<(acquiring_load<atomic_load_az_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
+def : Pat<(relaxed_load<atomic_load_az_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend8:$offset)),
(LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>;
-def : Pat<(relaxed_load<atomic_load_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend8:$offset)),
+def : Pat<(relaxed_load<atomic_load_az_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend8:$offset)),
(LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>;
-def : Pat<(relaxed_load<atomic_load_8> (am_indexed8 GPR64sp:$Rn,
- uimm12s1:$offset)),
+def : Pat<(relaxed_load<atomic_load_az_8> (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset)),
(LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
-def : Pat<(relaxed_load<atomic_load_8>
+def : Pat<(relaxed_load<atomic_load_az_8>
(am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
// 16-bit loads
-def : Pat<(seq_cst_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
-def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
-def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+def : Pat<(seq_cst_load<atomic_load_az_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
+def : Pat<(acquiring_load<atomic_load_az_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
+def : Pat<(relaxed_load<atomic_load_az_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)),
(LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>;
-def : Pat<(relaxed_load<atomic_load_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend)),
+def : Pat<(relaxed_load<atomic_load_az_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
(LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>;
-def : Pat<(relaxed_load<atomic_load_16> (am_indexed16 GPR64sp:$Rn,
- uimm12s2:$offset)),
+def : Pat<(relaxed_load<atomic_load_az_16> (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset)),
(LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
-def : Pat<(relaxed_load<atomic_load_16>
+def : Pat<(relaxed_load<atomic_load_az_16>
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
(LDURHHi GPR64sp:$Rn, simm9:$offset)>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 78bc1b8c6f02..02fa36a1df4b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1505,7 +1505,7 @@ class CRmSystemI<Operand crmtype, bits<3> opc, string asm,
class SystemNoOperands<bits<3> op2, string asm, list<dag> pattern = []>
: SimpleSystemI<0, (ins), asm, "", pattern>,
- Sched<[]> {
+ Sched<[WriteHint]> {
bits<4> CRm;
let CRm = 0b0011;
let Inst{31-12} = 0b11010101000000110010;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 3802a45ad6c1..d444223e4494 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4356,10 +4356,12 @@ defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
// AArch64's FCVT instructions saturate when out of range.
multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
+ let Predicates = [HasFullFP16] in {
def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
(!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
(!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
+ }
def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
(!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 68ff1b78e84b..c66f9cfd9c22 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -778,7 +778,7 @@ let Predicates = [HasSVEorSME] in {
defm BRKB_PPmP : sve_int_break_m<0b101, "brkb", int_aarch64_sve_brkb>;
defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>;
- def PTEST_PP : sve_int_ptest<0b010000, "ptest">;
+ def PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest>;
defm PFALSE : sve_int_pfalse<0b000000, "pfalse">;
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
@@ -1531,6 +1531,14 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
(PUNPKHI_PP PPR:$Ps)>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 1))),
+ (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))),
+ (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 3))),
+ (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
(PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))),
@@ -1539,7 +1547,6 @@ let Predicates = [HasSVEorSME] in {
(PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>;
def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))),
(PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
-
def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
(PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))),
@@ -1549,6 +1556,23 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))),
(PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
+
+ def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 1))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 3))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 5))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 7))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
(PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))),
@@ -1566,6 +1590,39 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))),
(PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 1))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 3))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 5))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 6))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 7))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 9))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 10))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 11))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 13))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 15))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>;
+
// Extract subvectors from FP SVE vectors
def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))),
(UUNPKLO_ZZ_D ZPR:$Zs)>;
@@ -2074,15 +2131,6 @@ let Predicates = [HasSVEorSME] in {
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}
- def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)),
- (PTEST_PP PPR:$pg, PPR:$src)>;
- def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)),
- (PTEST_PP PPR:$pg, PPR:$src)>;
- def : Pat<(AArch64ptest (nxv4i1 PPR:$pg), (nxv4i1 PPR:$src)),
- (PTEST_PP PPR:$pg, PPR:$src)>;
- def : Pat<(AArch64ptest (nxv2i1 PPR:$pg), (nxv2i1 PPR:$src)),
- (PTEST_PP PPR:$pg, PPR:$src)>;
-
let AddedComplexity = 1 in {
class LD1RPat<ValueType vt, SDPatternOperator operator,
Instruction load, Instruction ptrue, ValueType index_vt, ComplexPattern CP, Operand immtype> :
@@ -2347,6 +2395,9 @@ let Predicates = [HasSVEorSME] in {
(AND_PPzPP (PTRUE_S 31), PPR:$Ps1, PPR:$Ps2)>;
def : Pat<(nxv2i1 (and PPR:$Ps1, PPR:$Ps2)),
(AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>;
+ // Emulate .Q operation using a PTRUE_D when the other lanes don't matter.
+ def : Pat<(nxv1i1 (and PPR:$Ps1, PPR:$Ps2)),
+ (AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>;
// Add more complex addressing modes here as required
multiclass pred_load<ValueType Ty, ValueType PredTy, SDPatternOperator Load,
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA53.td b/llvm/lib/Target/AArch64/AArch64SchedA53.td
index d18a05fda191..e378b043d37e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA53.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td
@@ -28,7 +28,8 @@ def CortexA53Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td
index c6b112d0d2f1..141cc6b79c8b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -29,7 +29,7 @@ def CortexA55Model : SchedMachineModel {
let PostRAScheduler = 1; // Enable PostRA scheduler pass.
let CompleteModel = 0; // Covers instructions applicable to Cortex-A55.
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = [HasSVE, HasMTE];
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td
index a860aa907fd1..8ce229374000 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -33,7 +33,8 @@ def CortexA57Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
}
//===----------------------------------------------------------------------===//
@@ -459,9 +460,9 @@ def : InstRW<[A57Write_5cyc_2V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCM
// ASIMD FP convert, long and narrow
def : InstRW<[A57Write_8cyc_3V], (instregex "^FCVT(L|N|XN)v")>;
// ASIMD FP convert, other, D-form
-def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[A57Write_5cyc_1V], (instregex "^[FSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
// ASIMD FP convert, other, Q-form
-def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57Write_5cyc_2V], (instregex "^[FSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP divide, D-form, F32
def : InstRW<[A57Write_17cyc_1W], (instregex "FDIVv2f32")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
index 6b053f1969b4..4c65b6727d93 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -18,11 +18,11 @@ def A64FXModel : SchedMachineModel {
// Determined via a mix of micro-arch details and experimentation.
let LoopMicroOpBufferSize = 128;
let PostRAScheduler = 1; // Using PostRA sched.
- let CompleteModel = 1;
+ let CompleteModel = 0;
list<Predicate> UnsupportedFeatures =
[HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
- HasSVE2orSME];
+ HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16];
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
index 32f7299fbf87..b8d5a70d7ec6 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
@@ -25,7 +25,9 @@ def Ampere1Model : SchedMachineModel {
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ PAUnsupported.F,
+ [HasMTE]);
}
let SchedModel = Ampere1Model in {
diff --git a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
index 9fbb46919427..e2d916954060 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -20,7 +20,8 @@ def CycloneModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index d66efb82fccc..f2863f5a8e3b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -26,7 +26,8 @@ def ExynosM3Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
index 94e70793e855..ab1e680f9e99 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -26,7 +26,8 @@ def ExynosM4Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
index 1db5f5322a64..ae0b2b3eaeb6 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
@@ -26,7 +26,8 @@ def ExynosM5Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
index 7c9b0afdd169..a765cd1cdfe3 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -25,7 +25,8 @@ def FalkorModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
index cc568a2f2f17..3551066ee7c3 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedKryo.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
@@ -29,7 +29,8 @@ def KryoModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
new file mode 100644
index 000000000000..eb5b971d66e5
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -0,0 +1,2279 @@
+//=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the Arm Neoverse N2 processors.
+//
+//===----------------------------------------------------------------------===//
+
+def NeoverseN2Model : SchedMachineModel {
+ let IssueWidth = 10; // Micro-ops dispatched at a time.
+ let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer.
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 10; // Extra cycles for mispredicted branch.
+ let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = SMEUnsupported.F;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Neoverse N2.
+// Instructions are first fetched and then decoded into internal macro-ops
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch
+// stages. A MOP can be split into two micro-ops further down the pipeline
+// after the decode stage. Once dispatched, micro-ops wait for their operands
+// and issue out-of-order to one of thirteen issue pipelines. Each issue
+// pipeline can accept one micro-op per cycle.
+
+let SchedModel = NeoverseN2Model in {
+
+// Define the (13) issue ports.
+def N2UnitB : ProcResource<2>; // Branch 0/1
+def N2UnitS : ProcResource<2>; // Integer single Cycle 0/1
+def N2UnitM0 : ProcResource<1>; // Integer multicycle 0
+def N2UnitM1 : ProcResource<1>; // Integer multicycle 1
+def N2UnitL01 : ProcResource<2>; // Load/Store 0/1
+def N2UnitL2 : ProcResource<1>; // Load 2
+def N2UnitD : ProcResource<2>; // Store data 0/1
+def N2UnitV0 : ProcResource<1>; // FP/ASIMD 0
+def N2UnitV1 : ProcResource<1>; // FP/ASIMD 1
+
+def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>; // FP/ASIMD 0/1
+def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>; // Integer single/multicycle 0/1
+def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2
+def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1
+
+// Define commonly used read types.
+
+// No forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Neoverse N2.
+
+//===----------------------------------------------------------------------===//
+// Define generic 1 micro-op types
+
+def N2Write_1cyc_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; }
+def N2Write_1cyc_1I : SchedWriteRes<[N2UnitI]> { let Latency = 1; }
+def N2Write_1cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 1; }
+def N2Write_1cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 1; }
+def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; }
+def N2Write_2cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 2; }
+def N2Write_3cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 3; }
+def N2Write_2cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 2;
+ let ResourceCycles = [2]; }
+def N2Write_3cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 3;
+ let ResourceCycles = [3]; }
+def N2Write_5cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 5;
+ let ResourceCycles = [5]; }
+def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 12;
+ let ResourceCycles = [12]; }
+def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 20;
+ let ResourceCycles = [20]; }
+def N2Write_4cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 4; }
+def N2Write_6cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 6; }
+def N2Write_2cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 2; }
+def N2Write_3cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 3; }
+def N2Write_4cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 4; }
+def N2Write_5cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 5; }
+def N2Write_12cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 12; }
+def N2Write_2cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 2; }
+def N2Write_3cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 3; }
+def N2Write_4cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 4; }
+def N2Write_7cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 7;
+ let ResourceCycles = [7]; }
+def N2Write_9cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 9; }
+def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 10; }
+def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 12; }
+def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 13; }
+def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 15; }
+def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 16; }
+def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 20; }
+def N2Write_2cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 2; }
+def N2Write_3cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 3; }
+def N2Write_4cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 4; }
+def N2Write_6cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 6; }
+def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 10; }
+def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; }
+
+//===----------------------------------------------------------------------===//
+// Define generic 2 micro-op types
+
+def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [5, 5];
+}
+
+def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
+ let Latency = 13;
+ let NumMicroOps = 2;
+ let ResourceCycles = [6, 7];
+}
+
+def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+ let ResourceCycles = [7, 8];
+}
+
+def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
+ let Latency = 16;
+ let NumMicroOps = 2;
+ let ResourceCycles = [8, 8];
+}
+
+def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def N2Write_3cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def N2Write_2cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 3 micro-op types
+
+def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 3;
+}
+
+def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 4 micro-op types
+
+def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
+ N2UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+
+def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
+ N2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
+ N2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
+ N2UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0,
+ N2UnitV0]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
+ N2UnitV1]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+}
+
+def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
+ N2UnitV1]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+
+def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
+ N2UnitV1]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
+ N2UnitV1]> {
+ let Latency = 10;
+ let NumMicroOps = 4;
+}
+
+def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 4;
+}
+
+def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM,
+ N2UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 5 micro-op types
+
+def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
+ N2UnitI, N2UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 5;
+}
+
+def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
+ N2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 6 micro-op types
+
+def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
+ N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 6;
+}
+
+def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 6;
+}
+
+def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
+ N2UnitS, N2UnitS]> {
+ let Latency = 10;
+ let NumMicroOps = 6;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 7 micro-op types
+
+def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
+ N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 7;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 8 micro-op types
+
+def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV,
+ N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 8;
+}
+
+def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
+ N2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 8;
+}
+
+def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
+ N2UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+}
+
+def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
+ N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
+ N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 10 micro-op types
+
+def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitL01, N2UnitL01, N2UnitV,
+ N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 10;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 12 micro-op types
+
+def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitV, N2UnitV, N2UnitV, N2UnitV,
+ N2UnitV, N2UnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 12;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 15 micro-op types
+
+def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitL01, N2UnitL01, N2UnitS,
+ N2UnitS, N2UnitS, N2UnitS,
+ N2UnitS, N2UnitV, N2UnitV,
+ N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 15;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 18 micro-op types
+
+def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitV, N2UnitV, N2UnitV,
+ N2UnitV, N2UnitV, N2UnitV,
+ N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 18;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 27 micro-op types
+
+def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitL01, N2UnitL01, N2UnitL01,
+ N2UnitS, N2UnitS, N2UnitS,
+ N2UnitS, N2UnitS, N2UnitS,
+ N2UnitS, N2UnitS, N2UnitS,
+ N2UnitV, N2UnitV, N2UnitV,
+ N2UnitV, N2UnitV, N2UnitV,
+ N2UnitV, N2UnitV, N2UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 27;
+}
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+// Branch, immed
+// Compare and branch
+def : SchedAlias<WriteBr, N2Write_1cyc_1B>;
+
+// Branch, register
+def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>;
+
+// Branch and link, immed
+// Branch and link, register
+def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>;
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+
+// ALU, basic
+// ALU, basic, flagset
+def : SchedAlias<WriteI, N2Write_1cyc_1I>;
+
+// ALU, extend and shift
+def : SchedAlias<WriteISReg, N2Write_2cyc_1M>;
+def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>;
+
+// Arithmetic, immediate to logical address tag
+def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>;
+
+// Convert floating-point condition flags
+// Flag manipulation instructions
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+// Insert Random Tags
+def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>;
+
+// Insert Tag Mask
+// Subtract Pointer
+// Subtract Pointer, flagset
+def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
+
+// Move and shift instructions
+// -----------------------------------------------------------------------------
+
+def : SchedAlias<WriteImm, N2Write_1cyc_1I>;
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+
+// SDIV, UDIV
+def : SchedAlias<WriteID32, N2Write_12cyc_1M0>;
+def : SchedAlias<WriteID64, N2Write_20cyc_1M0>;
+
+def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; }
+def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; }
+
+// Multiply high
+def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
+
+// Pointer Authentication Instructions (v8.3 PAC)
+// -----------------------------------------------------------------------------
+
+// Authenticate data address
+// Authenticate instruction address
+// Compute pointer authentication code for data address
+// Compute pointer authentication code, using generic key
+// Compute pointer authentication code for instruction address
+def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
+
+// Branch and link, register, with pointer authentication
+// Branch, register, with pointer authentication
+// Branch, return, with pointer authentication
+def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
+ BRAAZ, BRAB, BRABZ, RETAA, RETAB,
+ ERETAA, ERETAB)>;
+
+
+// Load register, with pointer authentication
+def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
+
+// Strip pointer authentication code
+def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
+
+// Miscellaneous data-processing instructions
+// -----------------------------------------------------------------------------
+
+// Bitfield extract, one reg
+// Bitfield extract, two regs
+// NOTE: We don't model the difference between EXTR where both operands are the
+// same (one reg).
+def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>;
+def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitfield move, basic
+def : SchedAlias<WriteIS, N2Write_1cyc_1I>;
+
+// Bitfield move, insert
+def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
+
+// Load instructions
+// -----------------------------------------------------------------------------
+
+def : SchedAlias<WriteLD, N2Write_4cyc_1L>;
+def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>;
+
+// Load pair, signed immed offset, signed words
+def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>;
+// Load pair, immed post-index or immed pre-index, signed words
+def : InstRW<[N2Write_5cyc_1M0, WriteLDHi, WriteAdr],
+ (instregex "^LDPSW(post|pre)$")>;
+
+// Store instructions
+// -----------------------------------------------------------------------------
+
+def : SchedAlias<WriteST, N2Write_1cyc_1L01_1D>;
+def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>;
+def : SchedAlias<WriteSTP, N2Write_1cyc_1L01_1D>;
+def : SchedAlias<WriteAdr, N2Write_1cyc_1I>; // copied from A57.
+
+// Tag load instructions
+// -----------------------------------------------------------------------------
+
+// Load allocation tag
+// Load multiple allocation tags
+def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>;
+
+// Tag store instructions
+// -----------------------------------------------------------------------------
+
+// Store allocation tags to one or two granules, post-index
+// Store allocation tags to one or two granules, pre-index
+// Store allocation tag to one or two granules, zeroing, post-index
+// Store Allocation Tag to one or two granules, zeroing, pre-index
+// Store allocation tag and reg pair to memory, post-Index
+// Store allocation tag and reg pair to memory, pre-Index
+def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
+ ST2GPreIndex, ST2GPostIndex,
+ STZGPreIndex, STZGPostIndex,
+ STZ2GPreIndex, STZ2GPostIndex,
+ STGPpre, STGPpost)>;
+
+// Store allocation tags to one or two granules, signed offset
+// Store allocation tag to two granules, zeroing, signed offset
+// Store allocation tag and reg pair to memory, signed offset
+// Store multiple allocation tags
+def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGOffset, ST2GOffset, STZGOffset,
+ STZ2GOffset, STGPi, STGM, STZGM)>;
+
+// FP data processing instructions
+// -----------------------------------------------------------------------------
+
+// FP absolute value
+// FP arithmetic
+// FP min/max
+// FP negate
+// FP select
+def : SchedAlias<WriteF, N2Write_2cyc_1V>;
+
+// FP compare
+def : SchedAlias<WriteFCmp, N2Write_2cyc_1V0>;
+
+// FP divide, square root
+def : SchedAlias<WriteFDiv, N2Write_7cyc_1V0>;
+
+// FP divide, H-form
+def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVHrr)>;
+// FP divide, S-form
+def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>;
+// FP divide, D-form
+def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>;
+
+// FP square root, H-form
+def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTHr)>;
+// FP square root, S-form
+def : InstRW<[N2Write_9cyc_1V0], (instrs FSQRTSr)>;
+// FP square root, D-form
+def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>;
+
+// FP multiply
+def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }
+
+// FP multiply accumulate
+def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
+
+// FP round to integral
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
+ "^FRINT(32|64)[XZ][SD]r$")>;
+
+// FP miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// FP convert, from gen to vec reg
+def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
+
+// FP convert, from vec to gen reg
+def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
+
+// FP convert, Javascript from vec to gen reg
+// FP convert, from vec to vec reg
+def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>;
+
+// FP move, immed
+// FP move, register
+def : SchedAlias<WriteFImm, N2Write_2cyc_1V>;
+
+// FP transfer, from gen to low half of vec reg
+def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
+ FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
+
+// FP transfer, from gen to high half of vec reg
+def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
+
+// FP transfer, from vec to gen reg
+def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>;
+
+// FP load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector reg, literal, S/D/Q forms
+// Load vector reg, unscaled immed
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$",
+ "^LDUR[BHSDQ]i$")>;
+
+// Load vector reg, immed post-index
+def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>;
+// Load vector reg, immed pre-index
+def : InstRW<[N2Write_6cyc_1I_1L, WriteAdr], (instregex "^LDR[BHSDQ]pre$")>;
+
+// Load vector reg, unsigned immed
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
+
+// Load vector reg, register offset, basic
+// Load vector reg, register offset, scale, S/D-form
+// Load vector reg, register offset, extend
+// Load vector reg, register offset, extend, scale, S/D-form
+def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
+
+// Load vector reg, register offset, scale, H/Q-form
+// Load vector reg, register offset, extend, scale, H/Q-form
+def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
+
+// Load vector pair, immed offset, S/D-form
+def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
+
+// Load vector pair, immed offset, Q-form
+def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
+
+// Load vector pair, immed post-index, S/D-form
+// Load vector pair, immed pre-index, S/D-form
+def : InstRW<[N2Write_6cyc_1I_1L, WriteLDHi, WriteAdr],
+ (instregex "^LDP[SD](pre|post)$")>;
+
+// Load vector pair, immed post-index, Q-form
+// Load vector pair, immed pre-index, Q-form
+def : InstRW<[N2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost,
+ LDPQpre)>;
+
+// FP store instructions
+// -----------------------------------------------------------------------------
+
+// Store vector reg, unscaled immed, B/H/S/D-form
+// Store vector reg, unscaled immed, Q-form
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>;
+
+// Store vector reg, immed post-index, B/H/S/D-form
+// Store vector reg, immed post-index, Q-form
+// Store vector reg, immed pre-index, B/H/S/D-form
+// Store vector reg, immed pre-index, Q-form
+def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase],
+ (instregex "^STR[BHSDQ](pre|post)$")>;
+
+// Store vector reg, unsigned immed, B/H/S/D-form
+// Store vector reg, unsigned immed, Q-form
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>;
+
+// Store vector reg, register offset, basic, B/H/S/D-form
+// Store vector reg, register offset, basic, Q-form
+// Store vector reg, register offset, scale, S/D-form
+// Store vector reg, register offset, extend, B/H/S/D-form
+// Store vector reg, register offset, extend, Q-form
+// Store vector reg, register offset, extend, scale, S/D-form
+def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
+ (instregex "^STR[BSD]ro[WX]$")>;
+
+// Store vector reg, register offset, scale, H-form
+// Store vector reg, register offset, scale, Q-form
+// Store vector reg, register offset, extend, scale, H-form
+// Store vector reg, register offset, extend, scale, Q-form
+def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
+ (instregex "^STR[HQ]ro[WX]$")>;
+
+// Store vector pair, immed offset, S-form
+// Store vector pair, immed offset, D-form
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>;
+
+// Store vector pair, immed offset, Q-form
+def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>;
+
+// Store vector pair, immed post-index, S-form
+// Store vector pair, immed post-index, D-form
+// Store vector pair, immed pre-index, S-form
+// Store vector pair, immed pre-index, D-form
+def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I],
+ (instregex "^STP[SD](pre|post)$")>;
+
+// Store vector pair, immed post-index, Q-form
+def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>;
+
+// Store vector pair, immed pre-index, Q-form
+def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>;
+
+// ASIMD integer instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD absolute diff
+// ASIMD absolute diff long
+// ASIMD arith, basic
+// ASIMD arith, complex
+// ASIMD arith, pair-wise
+// ASIMD compare
+// ASIMD logical
+// ASIMD max/min, basic and pair-wise
+def : SchedAlias<WriteVd, N2Write_2cyc_1V>;
+def : SchedAlias<WriteVq, N2Write_2cyc_1V>;
+
+// ASIMD absolute diff accum
+// ASIMD absolute diff accum long
+def : InstRW<[N2Write_4cyc_1V1],
+ (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
+
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
+
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[N2Write_4cyc_1V1_1V],
+ (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
+
+// ASIMD arith, reduce, 16B
+def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v,
+ UADDLVv16i8v)>;
+
+// ASIMD dot product
+// ASIMD dot product using signed and unsigned integers
+def : InstRW<[N2Write_3cyc_1V],
+ (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
+
+// ASIMD matrix multiply-accumulate
+def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$",
+ "^[SU](MAX|MIN)Vv4i32v$")>;
+
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
+ "^[SU](MAX|MIN)Vv8i16v$")>;
+
+// ASIMD max/min, reduce, 16B
+def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
+
+// ASIMD multiply
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>;
+
+// ASIMD multiply accumulate
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>;
+
+// ASIMD multiply accumulate high
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
+
+// ASIMD multiply accumulate saturating long
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>;
+
+// ASIMD multiply/multiply long (8x8) polynomial, D-form
+// ASIMD multiply/multiply long (8x8) polynomial, Q-form
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>;
+
+// ASIMD multiply long
+def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>;
+
+// ASIMD pairwise add and accumulate long
+def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>;
+
+// ASIMD shift accumulate
+def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv",
+ "^SSHLLv", "^SSHRv", "^USHLLv",
+ "^USHRv")>;
+
+// ASIMD shift by immed and insert, basic
+def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[N2Write_4cyc_1V1],
+ (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv",
+ "^(SQSHLU?|UQSHL)[bhsd]$",
+ "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
+ "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv",
+ "^UQSHRNv", "^URSHRv")>;
+
+// ASIMD shift by register, basic
+def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>;
+
+// ASIMD shift by register, complex
+def : InstRW<[N2Write_4cyc_1V1],
+ (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
+ "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
+
+// ASIMD floating-point instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD FP absolute value/difference
+// ASIMD FP arith, normal
+// ASIMD FP compare
+// ASIMD FP complex add
+// ASIMD FP max/min, normal
+// ASIMD FP max/min, pairwise
+// ASIMD FP negate
+// Handled by SchedAlias<WriteV[dq], ...>
+
+// ASIMD FP complex multiply add
+def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>;
+
+// ASIMD FP convert, long (F16 to F32)
+def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>;
+
+// ASIMD FP convert, long (F32 to F64)
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>;
+
+// ASIMD FP convert, narrow (F32 to F16)
+def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>;
+
+// ASIMD FP convert, narrow (F64 to F32)
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32",
+ "^FCVTXN(v2|v4)f32")>;
+
+// ASIMD FP convert, other, D-form F32 and Q-form F64
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
+ "^[SU]CVTFv2f(32|64)$")>;
+
+// ASIMD FP convert, other, D-form F16 and Q-form F32
+def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
+ "^[SU]CVTFv4f(16|32)$")>;
+
+// ASIMD FP convert, other, Q-form F16
+def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
+ "^[SU]CVTFv8f16$")>;
+
+// ASIMD FP divide, D-form, F16
+def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>;
+
+// ASIMD FP divide, Q-form, F16
+def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>;
+
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>;
+
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>;
+
+// ASIMD FP max/min, reduce, F32 and D-form F16
+def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
+
+// ASIMD FP max/min, reduce, Q-form F16
+def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
+
+// ASIMD FP multiply
+def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>;
+
+// ASIMD FP multiply accumulate
+def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>;
+
+// ASIMD FP multiply accumulate long
+def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>;
+
+// ASIMD FP round, D-form F32 and Q-form F64
+def : InstRW<[N2Write_3cyc_1V0],
+ (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
+ "^FRINT[32|64)[XZ]v2f(32|64)$")>;
+
+// ASIMD FP round, D-form F16 and Q-form F32
+def : InstRW<[N2Write_4cyc_2V0],
+ (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
+ "^FRINT(32|64)[XZ]v4f32$")>;
+
+
+// ASIMD FP round, Q-form F16
+def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
+
+// ASIMD FP square root, D-form, F16
+def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>;
+
+// ASIMD FP square root, D-form, F32
+def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>;
+
+// ASIMD FP square root, Q-form, F16
+def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>;
+
+// ASIMD FP square root, Q-form, F32
+def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>;
+
+// ASIMD FP square root, Q-form, F64
+def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>;
+
+// ASIMD BFloat16 (BF16) instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD convert, F32 to BF16
+def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>;
+
+// ASIMD dot product
+def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
+
+// ASIMD matrix multiply accumulate
+def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
+ BFMLALTIdx)>;
+
+// Scalar convert, F32 to BF16
+def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>;
+
+// ASIMD miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD bit reverse
+// ASIMD bitwise insert
+// ASIMD count
+// ASIMD duplicate, element
+// ASIMD extract
+// ASIMD extract narrow
+// ASIMD insert, element to element
+// ASIMD move, FP immed
+// ASIMD move, integer immed
+// ASIMD reverse
+// ASIMD table lookup, 1 or 2 table regs
+// ASIMD table lookup extension, 1 table reg
+// ASIMD transfer, element to gen reg
+// ASIMD transpose
+// ASIMD unzip/zip
+// Handled by SchedAlias<WriteV[dq], ...>
+
+// ASIMD duplicate, gen reg
+def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
+
+// ASIMD extract narrow, saturating
+def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
+
+// ASIMD reciprocal and square root estimate, D-form U32
+def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>;
+
+// ASIMD reciprocal and square root estimate, Q-form U32
+def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
+
+// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
+def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
+ FRECPEv1i64, FRECPEv2f32,
+ FRSQRTEv1f16, FRSQRTEv1i32,
+ FRSQRTEv1i64, FRSQRTEv2f32)>;
+
+// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
+def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
+ FRSQRTEv4f16, FRSQRTEv4f32)>;
+
+// ASIMD reciprocal and square root estimate, Q-form F16
+def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
+
+// ASIMD reciprocal exponent
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>;
+
+// ASIMD reciprocal step
+def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>;
+
+// ASIMD table lookup, 3 table regs
+def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
+
+// ASIMD table lookup, 4 table regs
+def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>;
+
+// ASIMD table lookup extension, 2 table reg
+def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
+
+// ASIMD table lookup extension, 3 table reg
+def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
+
+// ASIMD table lookup extension, 4 table reg
+def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
+
+// ASIMD transfer, gen reg to element
+def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>;
+
+// ASIMD load instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD load, 1 element, multiple, 1 reg, D-form
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_6cyc_1L, WriteAdr],
+ (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_6cyc_1L, WriteAdr],
+ (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, D-form
+def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_6cyc_2L, WriteAdr],
+ (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_6cyc_2L, WriteAdr],
+ (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, D-form
+def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_6cyc_3L, WriteAdr],
+ (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_6cyc_3L, WriteAdr],
+ (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form
+def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_7cyc_4L, WriteAdr],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_7cyc_4L, WriteAdr],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, one lane, B/H/S
+// ASIMD load, 1 element, one lane, D
+def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, D-form, B/H/S
+// ASIMD load, 1 element, all lanes, D-form, D
+def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, Q-form
+def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form, B/H/S
+def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 2 element, multiple, Q-form, B/H/S
+// ASIMD load, 2 element, multiple, Q-form, D
+def : InstRW<[N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane, B/H
+// ASIMD load, 2 element, one lane, S
+// ASIMD load, 2 element, one lane, D
+def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, D-form, B/H/S
+// ASIMD load, 2 element, all lanes, D-form, D
+def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, Q-form
+def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form, B/H/S
+def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 3 element, multiple, Q-form, B/H/S
+def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
+
+// ASIMD load, 3 element, multiple, Q-form, D
+def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+// ASIMD load, 3 element, one lane, B/H
+// ASIMD load, 3 element, one lane, S
+// ASIMD load, 3 element, one lane, D
+def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, D-form, B/H/S
+// ASIMD load, 3 element, all lanes, D-form, D
+def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S
+// ASIMD load, 3 element, all lanes, Q-form, D
+def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form, B/H/S
+def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 4 element, multiple, Q-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, D
+def : InstRW<[N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_9cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane, B/H
+// ASIMD load, 4 element, one lane, S
+// ASIMD load, 4 element, one lane, D
+def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, D-form, B/H/S
+// ASIMD load, 4 element, all lanes, D-form, D
+def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S
+// ASIMD load, 4 element, all lanes, Q-form, D
+def : InstRW<[N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD store, 1 element, multiple, 1 reg, D-form
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form
+def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_2cyc_3L01_3V, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form
+def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_2cyc_4L01_4V, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane, B/H/S
+// ASIMD store, 1 element, one lane, D
+def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S
+def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 2 element, multiple, Q-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, D
+def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_4cyc_2L01_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane, B/H/S
+// ASIMD store, 2 element, one lane, D
+def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S
+def : InstRW<[N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[N2Write_5cyc_2L01_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 3 element, multiple, Q-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, D
+def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H
+// ASIMD store, 3 element, one lane, S
+// ASIMD store, 3 element, one lane, D
+def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S
+def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, B/H/S
+def : InstRW<[N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[N2Write_7cyc_6L01_6V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, D
+def : InstRW<[N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[N2Write_5cyc_4L01_4V, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+// ASIMD store, 4 element, one lane, B/H/S
+def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)$")>;
+def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>;
+
+// ASIMD store, 4 element, one lane, D
+def : InstRW<[N2Write_4cyc_3L01_3V], (instregex "ST4i(64)$")>;
+def : InstRW<[N2Write_4cyc_3L01_3V, WriteAdr], (instregex "ST4i(64)_POST$")>;
+
+// Cryptography extensions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
+
+// Crypto polynomial (64x64) multiply long
+def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>;
+
+// Crypto SHA1 hash acceleration op
+// Crypto SHA1 schedule acceleration ops
+def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
+
+// Crypto SHA1 hash acceleration ops
+// Crypto SHA256 hash acceleration ops
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
+
+// Crypto SHA256 schedule acceleration ops
+def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
+
+// Crypto SHA512 hash acceleration ops
+def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
+
+// Crypto SHA3 ops
+def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
+
+// Crypto SM3 ops
+def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
+ "^SM3TT[12][AB]$")>;
+
+// Crypto SM4 ops
+def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
+
+// CRC
+// -----------------------------------------------------------------------------
+
+def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>;
+
+// SVE Predicate instructions
+// -----------------------------------------------------------------------------
+
+// Loop control, based on predicate
+def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
+ BRKB_PPmP, BRKB_PPzP)>;
+
+// Loop control, based on predicate and flag setting
+def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
+
+// Loop control, propagating
+def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
+
+// Loop control, propagating and flag setting
+def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
+ BRKPBS_PPzPP)>;
+
+// Loop control, based on GPR
+def : InstRW<[N2Write_3cyc_1M],
+ (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
+
+def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
+
+// Loop terminate
+def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
+
+// Predicate counting scalar
+def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
+def : InstRW<[N2Write_2cyc_1M],
+ (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$",
+ "^SQ(DEC|INC)[BHWD]_XPiWdI$",
+ "^(UQDEC|UQINC)[BHWD]_WPiI$")>;
+
+// Predicate counting scalar, active predicate
+def : InstRW<[N2Write_2cyc_1M],
+ (instregex "^CNTP_XPP_[BHSD]$",
+ "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
+ "^(UQDEC|UQINC)P_WP_[BHSD]$",
+ "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>;
+
+// Predicate counting vector, active predicate
+def : InstRW<[N2Write_7cyc_1M_1M0_1V],
+ (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
+
+// Predicate logical
+def : InstRW<[N2Write_1cyc_1M0],
+ (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
+
+// Predicate logical, flag setting
+def : InstRW<[N2Write_2cyc_1M0_1M],
+ (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
+
+// Predicate reverse
+def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>;
+
+// Predicate select
+def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>;
+
+// Predicate set
+def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
+
+// Predicate set/initialize, set flags
+def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>;
+
+// Predicate find first/next
+def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
+
+// Predicate test
+def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>;
+
+// Predicate transpose
+def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>;
+
+// Predicate unpack and widen
+def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
+
+// Predicate zip/unzip
+def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
+
+// SVE integer instructions
+// -----------------------------------------------------------------------------
+
+// Arithmetic, absolute diff
+def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>;
+
+// Arithmetic, absolute diff accum
+def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
+
+// Arithmetic, absolute diff accum long
+def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
+
+// Arithmetic, absolute diff long
+def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
+
+// Arithmetic, basic
+def : InstRW<[N2Write_2cyc_1V],
+ (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
+ "^(ADD|SUB)_ZZZ_[BHSD]$",
+ "^(ADD|SUB|SUBR)_ZI_[BHSD]$",
+ "^ADR_[SU]XTW_ZZZ_D_[0123]$",
+ "^ADR_LSL_ZZZ_[SD]_[0123]$",
+ "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$",
+ "^SADDLBT_ZZZ_[HSD]$",
+ "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
+ "^SSUBL(BT|TB)_ZZZ_[HSD]$")>;
+
+// Arithmetic, complex
+def : InstRW<[N2Write_2cyc_1V],
+ (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$",
+ "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
+ "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$",
+ "^[SU]Q(ADD|SUB)_ZI_[BHSD]$",
+ "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$",
+ "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>;
+
+// Arithmetic, large integer
+def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
+
+// Arithmetic, pairwise add
+def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
+
+// Arithmetic, pairwise add and accum long
+def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
+
+// Arithmetic, shift
+def : InstRW<[N2Write_2cyc_1V1],
+ (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$",
+ "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$",
+ "^(ASR|LSL|LSR)_ZPmI_[BHSD]$",
+ "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$",
+ "^(ASR|LSL|LSR)_ZZI_[BHSD]$",
+ "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>;
+
+// Arithmetic, shift and accumulate
+def : InstRW<[N2Write_4cyc_1V1],
+ (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>;
+
+// Arithmetic, shift by immediate
+// Arithmetic, shift by immediate and insert
+def : InstRW<[N2Write_2cyc_1V1],
+ (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>;
+
+// Arithmetic, shift complex
+def : InstRW<[N2Write_4cyc_1V1],
+ (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$",
+ "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$",
+ "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$",
+ "^SQSHRU?N[BT]_ZZI_[BHS]$",
+ "^UQR?SHRN[BT]_ZZI_[BHS]$")>;
+
+// Arithmetic, shift right for divide
+def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_ZPmI_[BHSD]$")>;
+
+// Arithmetic, shift rounding
+def : InstRW<[N2Write_4cyc_1V1],
+ (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$",
+ "^[SU]RSHR_ZPmI_[BHSD]$")>;
+
+// Bit manipulation
+def : InstRW<[N2Write_6cyc_2V1],
+ (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>;
+
+// Bitwise select
+def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
+
+// Count/reverse bits
+def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
+
+// Broadcast logical bitmask immediate to vector
+def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>;
+
+// Compare and set flags
+def : InstRW<[N2Write_4cyc_1V0_1M],
+ (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
+ "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
+
+// Complex add
+def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
+
+// Complex dot product 8-bit element
+def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
+
+// Complex dot product 16-bit element
+def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
+
+// Complex multiply-add B, H, S element size
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$",
+ "^CMLA_ZZZI_[HS]$")>;
+
+// Complex multiply-add D element size
+def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>;
+
+// Conditional extract operations, scalar form
+def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
+
+// Conditional extract operations, SIMD&FP scalar and vector forms
+def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
+ "^COMPACT_ZPZ_[SD]$",
+ "^SPLICE_ZPZZ?_[BHSD]$")>;
+
+// Convert to floating point, 64b to float or convert to double
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>;
+
+// Convert to floating point, 64b to half
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_DtoH$")>;
+
+// Convert to floating point, 32b to single or half
+def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
+
+// Convert to floating point, 32b to double
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_StoD$")>;
+
+// Convert to floating point, 16b to half
+def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
+
+// Copy, scalar
+def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
+
+// Copy, scalar SIMD&FP or imm
+def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
+ "^CPY_ZPzI_[BHSD]$")>;
+
+// Divides, 32 bit
+def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
+
+// Divides, 64 bit
+def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
+
+// Dot product, 8 bit
+def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
+
+// Dot product, 8 bit, using signed and unsigned integers
+def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
+
+// Dot product, 16 bit
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
+
+// Duplicate, immediate and indexed form
+def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
+ "^DUP_ZZI_[BHSDQ]$")>;
+
+// Duplicate, scalar form
+def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
+
+// Extend, sign or zero
+def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
+ "^[SU]XTH_ZPmZ_[SD]$",
+ "^[SU]XTW_ZPmZ_[D]$")>;
+
+// Extract
+def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
+
+// Extract narrow saturating
+def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
+ "^SQXTUN[BT]_ZZ_[BHS]$")>;
+
+// Extract/insert operation, SIMD and FP scalar form
+def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
+ "^INSR_ZV_[BHSD]$")>;
+
+// Extract/insert operation, scalar
+def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$",
+ "^INSR_ZR_[BHSD]$")>;
+
+// Histogram operations
+def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
+ "^HISTSEG_ZZZ$")>;
+
+// Horizontal operations, B, H, S form, immediate operands only
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>;
+
+// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
+// operands only / immediate, scalar operands
+def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
+
+// Horizontal operations, D form, immediate operands only
+def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>;
+
+// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
+// only / immediate, scalar operands
+def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
+
+// Logical
+def : InstRW<[N2Write_2cyc_1V],
+ (instregex "^(AND|EOR|ORR)_ZI$",
+ "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
+ "^EOR(BT|TB)_ZZZ_[BHSD]$",
+ "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
+
+// Max/min, basic and pairwise
+def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$",
+ "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>;
+
+// Matching operations
+def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
+
+// Matrix multiply-accumulate
+def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
+
+// Move prefix
+def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
+ "^MOVPRFX_ZZ$")>;
+
+// Multiply, B, H, S element size
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$",
+ "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
+
+// Multiply, D element size
+def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$",
+ "^[SU]MULH_(ZPmZ|ZZZ)_D$")>;
+
+// Multiply long
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
+ "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
+
+// Multiply accumulate, B, H, S element size
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$",
+ "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>;
+
+// Multiply accumulate, D element size
+def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$",
+ "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>;
+
+// Multiply accumulate long
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
+ "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
+
+// Multiply accumulate saturating doubling long regular
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
+ "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
+
+// Multiply saturating doubling high, B, H, S element size
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$",
+ "^SQDMULH_ZZZI_[HS]$")>;
+
+// Multiply saturating doubling high, D element size
+def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
+
+// Multiply saturating doubling long
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
+ "^SQDMULL[BT]_ZZZI_[SD]$")>;
+
+// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
+// element size
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
+ "^SQRDCMLAH_ZZZ_[BHS]$",
+ "^SQRDML[AS]H_ZZZI_[HS]$",
+ "^SQRDCMLAH_ZZZI_[HS]$")>;
+
+// Multiply saturating rounding doubling regular/complex accumulate, D element
+// size
+def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$",
+ "^SQRDCMLAH_ZZZ_D$")>;
+
+// Multiply saturating rounding doubling regular/complex, B, H, S element size
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$",
+ "^SQRDMULH_ZZZI_[HS]$")>;
+
+// Multiply saturating rounding doubling regular/complex, D element size
+def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>;
+
+// Multiply/multiply long, (8x8) polynomial
+def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$",
+ "^PMULL[BT]_ZZZ_[HDQ]$")>;
+
+// Predicate counting vector
+def : InstRW<[N2Write_2cyc_1V0],
+ (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
+
+// Reciprocal estimate
+def : InstRW<[N2Write_4cyc_2V0], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>;
+
+// Reduction, arithmetic, B form
+def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
+
+// Reduction, arithmetic, H form
+def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
+
+// Reduction, arithmetic, S form
+def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
+
+// Reduction, arithmetic, D form
+def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
+
+// Reduction, logical
+def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>;
+
+// Reverse, vector
+def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$",
+ "^REVB_ZPmZ_[HSD]$",
+ "^REVH_ZPmZ_[SD]$",
+ "^REVW_ZPmZ_D$")>;
+
+// Select, vector form
+def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>;
+
+// Table lookup
+def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
+
+// Table lookup extension
+def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>;
+
+// Transpose, vector form
+def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
+
+// Unpack and extend
+def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
+
+// Zip/unzip
+def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
+
+// SVE floating-point instructions
+// -----------------------------------------------------------------------------
+
+// Floating point absolute value/difference
+def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>;
+
+// Floating point arithmetic
+def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
+ "^FADDP_ZPmZZ_[HSD]$",
+ "^FNEG_ZPmZ_[HSD]$",
+ "^FSUBR_ZPm[IZ]_[HSD]$")>;
+
+// Floating point associative add, F16
+def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>;
+
+// Floating point associative add, F32
+def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>;
+
+// Floating point associative add, F64
+def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
+
+// Floating point compare
+def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
+ "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
+ "^FCM(LE|LT)_PPzZ0_[HSD]$",
+ "^FCMUO_PPzZZ_[HSD]$")>;
+
+// Floating point complex add
+def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
+
+// Floating point complex multiply add
+def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$",
+ "^FCMLA_ZZZI_[HS]$")>;
+
+// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
+def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
+ "^FCVTLT_ZPmZ_HtoS$",
+ "^FCVTNT_ZPmZ_StoH$")>;
+
+// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
+// or F64 to F16)
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
+ "^FCVTLT_ZPmZ_StoD$",
+ "^FCVTNT_ZPmZ_DtoS$")>;
+
+// Floating point convert, round to odd
+def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
+
+// Floating point base2 log, F16
+def : InstRW<[N2Write_6cyc_4V0], (instrs FLOGB_ZPmZ_H)>;
+
+// Floating point base2 log, F32
+def : InstRW<[N2Write_4cyc_2V0], (instrs FLOGB_ZPmZ_S)>;
+
+// Floating point base2 log, F64
+def : InstRW<[N2Write_3cyc_1V0], (instrs FLOGB_ZPmZ_D)>;
+
+// Floating point convert to integer, F16
+def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
+
+// Floating point convert to integer, F32
+def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
+
+// Floating point convert to integer, F64
+def : InstRW<[N2Write_3cyc_1V0],
+ (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
+
+// Floating point copy
+def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
+ "^FDUP_ZI_[HSD]$")>;
+
+// Floating point divide, F16
+def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
+
+// Floating point divide, F32
+def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
+
+// Floating point divide, F64
+def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
+
+// Floating point min/max pairwise
+def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>;
+
+// Floating point min/max
+def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
+
+// Floating point multiply
+def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$",
+ "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
+
+// Floating point multiply accumulate
+def : InstRW<[N2Write_4cyc_1V],
+ (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$",
+ "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>;
+
+// Floating point multiply add/sub accumulate long
+def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
+
+// Floating point reciprocal estimate, F16
+def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H,
+ FRSQRTE_ZZ_H)>;
+
+// Floating point reciprocal estimate, F32
+def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S,
+ FRSQRTE_ZZ_S)>;
+
+// Floating point reciprocal estimate, F64
+def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D,
+ FRSQRTE_ZZ_D)>;
+
+// Floating point reciprocal step
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
+
+// Floating point reduction, F16
+def : InstRW<[N2Write_6cyc_2V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>;
+
+// Floating point reduction, F32
+def : InstRW<[N2Write_4cyc_1V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>;
+
+// Floating point reduction, F64
+def : InstRW<[N2Write_2cyc_1V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
+
+// Floating point round to integral, F16
+def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
+
+// Floating point round to integral, F32
+def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
+
+// Floating point round to integral, F64
+def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
+
+// Floating point square root, F16
+def : InstRW<[N2Write_13cyc_1V0], (instrs FSQRT_ZPmZ_H)>;
+
+// Floating point square root, F32
+def : InstRW<[N2Write_10cyc_1V0], (instrs FSQRT_ZPmZ_S)>;
+
+// Floating point square root, F64
+def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRT_ZPmZ_D)>;
+
+// Floating point trigonometric exponentiation
+def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;
+
+// Floating point trigonometric multiply add
+def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>;
+
+// Floating point trigonometric, miscellaneous
+def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
+
+// SVE BFloat16 (BF16) instructions
+// -----------------------------------------------------------------------------
+
+// Convert, F32 to BF16
+def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
+
+// Dot product
+def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
+
+// Matrix multiply accumulate
+def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>;
+
+// Multiply accumulate long
+def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZ[ZI]$")>;
+
+// SVE Load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector
+def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>;
+
+// Load predicate
+def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
+
+// Contiguous load, scalar + imm
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$",
+ "^LD1S?B_[HSD]_IMM_REAL$",
+ "^LD1S?H_[SD]_IMM_REAL$",
+ "^LD1S?W_D_IMM_REAL$" )>;
+// Contiguous load, scalar + scalar
+def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$",
+ "^LD1S?B_[HSD]$",
+ "^LD1S?H_[SD]$",
+ "^LD1S?W_D$" )>;
+
+// Contiguous load broadcast, scalar + imm
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
+ "^LD1RSW_IMM$",
+ "^LD1RS?B_[HSD]_IMM$",
+ "^LD1RS?H_[SD]_IMM$",
+ "^LD1RS?W_D_IMM$",
+ "^LD1RQ_[BHWD]_IMM$")>;
+
+// Contiguous load broadcast, scalar + scalar
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
+
+// Non temporal load, scalar + imm
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
+
+// Non temporal load, scalar + scalar
+def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;
+
+// Non temporal gather load, vector + scalar 32-bit element size
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
+ "^LDNT1S[BH]_ZZR_S_REAL$")>;
+
+// Non temporal gather load, vector + scalar 64-bit element size
+def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
+def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
+
+// Contiguous first faulting load, scalar + scalar
+def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
+ "^LDFF1S?B_[HSD]_REAL$",
+ "^LDFF1S?H_[SD]_REAL$",
+ "^LDFF1S?W_D_REAL$")>;
+
+// Contiguous non faulting load, scalar + imm
+def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
+ "^LDNF1S?B_[HSD]_IMM_REAL$",
+ "^LDNF1S?H_[SD]_IMM_REAL$",
+ "^LDNF1S?W_D_IMM_REAL$")>;
+
+// Contiguous Load two structures to two vectors, scalar + imm
+def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
+
+// Contiguous Load two structures to two vectors, scalar + scalar
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>;
+
+// Contiguous Load three structures to three vectors, scalar + imm
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>;
+
+// Contiguous Load three structures to three vectors, scalar + scalar
+def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>;
+
+// Contiguous Load four structures to four vectors, scalar + imm
+def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
+
+// Contiguous Load four structures to four vectors, scalar + scalar
+def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;
+
+// Gather load, vector + imm, 32-bit element size
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
+ "^GLD(FF)?1W_IMM_REAL$")>;
+
+// Gather load, vector + imm, 64-bit element size
+def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
+ "^GLD(FF)?1D_IMM_REAL$")>;
+
+// Gather load, 64-bit element size
+def : InstRW<[N2Write_9cyc_2L_2V],
+ (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
+ "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
+ "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
+ "^GLD(FF)?1D_(SCALED_)?REAL$")>;
+
+// Gather load, 32-bit scaled offset
+def : InstRW<[N2Write_10cyc_2L_2V],
+ (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+
+// Gather load, 32-bit unpacked unscaled offset
+def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
+ "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+
+// SVE Store instructions
+// -----------------------------------------------------------------------------
+
+// Store from predicate reg
+def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>;
+
+// Store from vector reg
+def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>;
+
+// Contiguous store, scalar + imm
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
+ "^ST1B_[HSD]_IMM$",
+ "^ST1H_[SD]_IMM$",
+ "^ST1W_D_IMM$")>;
+
+// Contiguous store, scalar + scalar
+def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$",
+ "^ST1B_[HSD]$",
+ "^ST1W_D$")>;
+
+// Contiguous store two structures from two vectors, scalar + imm
+def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>;
+
+// Contiguous store two structures from two vectors, scalar + scalar
+def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>;
+
+// Contiguous store two structures from two vectors, scalar + scalar
+def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>;
+
+// Contiguous store three structures from three vectors, scalar + imm
+def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
+
+// Contiguous store three structures from three vectors, scalar + scalar
+def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>;
+
+// Contiguous store three structures from three vectors, scalar + scalar
+def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>;
+
+// Contiguous store four structures from four vectors, scalar + imm
+def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
+
+// Contiguous store four structures from four vectors, scalar + scalar
+def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>;
+
+// Contiguous store four structures from four vectors, scalar + scalar
+def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>;
+
+// Non temporal store, scalar + imm
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
+
+// Non temporal store, scalar + scalar
+def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
+
+// Scatter non temporal store, vector + scalar 32-bit element size
+def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>;
+
+// Scatter non temporal store, vector + scalar 64-bit element size
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>;
+
+// Scatter store vector + imm 32-bit element size
+def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
+ "^SST1W_IMM$")>;
+
+// Scatter store vector + imm 64-bit element size
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
+ "^SST1D_IMM$")>;
+
+// Scatter store, 32-bit scaled offset
+def : InstRW<[N2Write_4cyc_2L01_2V],
+ (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
+
+// Scatter store, 32-bit unpacked unscaled offset
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
+ "^SST1D_[SU]XTW$")>;
+
+// Scatter store, 32-bit unpacked scaled offset
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
+ "^SST1D_[SU]XTW_SCALED$")>;
+
+// Scatter store, 32-bit unscaled offset
+def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$",
+ "^SST1W_[SU]XTW$")>;
+
+// Scatter store, 64-bit scaled offset
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$",
+ "^SST1D_SCALED$")>;
+
+// Scatter store, 64-bit unscaled offset
+def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
+ "^SST1D$")>;
+
+// SVE Miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// Read first fault register, unpredicated
+def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
+
+// Read first fault register, predicated
+def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
+
+// Read first fault register and set flags
+def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
+
+// Set first fault register
+// Write to first fault register
+def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
+
+// Prefetch
+def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
+
+// SVE Cryptographic instructions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
+ "^AESI?MC_ZZ_B$")>;
+
+// Crypto SHA3 ops
+def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
+ "^RAX1_ZZZ_D$",
+ "^XAR_ZZZI_[BHSD]$")>;
+
+// Crypto SM4 ops
+def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
+
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
index 6ecfc97a4273..9c1bf3231a55 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
@@ -26,7 +26,8 @@ def TSV110Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
}
// Define each kind of processor resource and number available on the TSV110,
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
index ff34c0ce9a0c..8b380ae0e8f3 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -27,7 +27,8 @@ def ThunderXT8XModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index ffa0a5e7d91a..cdafa33da054 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -27,7 +27,8 @@ def ThunderX2T99Model : SchedMachineModel {
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
PAUnsupported.F,
- SMEUnsupported.F);
+ SMEUnsupported.F,
+ [HasMTE]);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
index 46a1c217f984..5b1e9b5bcf23 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
@@ -25,7 +25,8 @@ def ThunderX3T110Model : SchedMachineModel {
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
- PAUnsupported.F);
+ PAUnsupported.F,
+ [HasMTE]);
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 41c7a8c5042f..274a025e82a0 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -796,6 +796,50 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
return IC.replaceInstUsesWith(II, Extract);
}
+static Optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
+ IntrinsicInst &II) {
+ // The SIMD&FP variant of CLAST[AB] is significantly faster than the scalar
+ // integer variant across a variety of micro-architectures. Replace scalar
+ // integer CLAST[AB] intrinsic with optimal SIMD&FP variant. A simple
+ // bitcast-to-fp + clast[ab] + bitcast-to-int will cost a cycle or two more
+ // depending on the micro-architecture, but has been observed as generally
+ // being faster, particularly when the CLAST[AB] op is a loop-carried
+ // dependency.
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ Value *Pg = II.getArgOperand(0);
+ Value *Fallback = II.getArgOperand(1);
+ Value *Vec = II.getArgOperand(2);
+ Type *Ty = II.getType();
+
+ if (!Ty->isIntegerTy())
+ return None;
+
+ Type *FPTy;
+ switch (cast<IntegerType>(Ty)->getBitWidth()) {
+ default:
+ return None;
+ case 16:
+ FPTy = Builder.getHalfTy();
+ break;
+ case 32:
+ FPTy = Builder.getFloatTy();
+ break;
+ case 64:
+ FPTy = Builder.getDoubleTy();
+ break;
+ }
+
+ Value *FPFallBack = Builder.CreateBitCast(Fallback, FPTy);
+ auto *FPVTy = VectorType::get(
+ FPTy, cast<VectorType>(Vec->getType())->getElementCount());
+ Value *FPVec = Builder.CreateBitCast(Vec, FPVTy);
+ auto *FPII = Builder.CreateIntrinsic(II.getIntrinsicID(), {FPVec->getType()},
+ {Pg, FPFallBack, FPVec});
+ Value *FPIItoInt = Builder.CreateBitCast(FPII, II.getType());
+ return IC.replaceInstUsesWith(II, FPIItoInt);
+}
+
static Optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
IntrinsicInst &II) {
LLVMContext &Ctx = II.getContext();
@@ -1294,6 +1338,9 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_lasta:
case Intrinsic::aarch64_sve_lastb:
return instCombineSVELast(IC, II);
+ case Intrinsic::aarch64_sve_clasta_n:
+ case Intrinsic::aarch64_sve_clastb_n:
+ return instCombineSVECondLast(IC, II);
case Intrinsic::aarch64_sve_cntd:
return instCombineSVECntElts(IC, II, 2);
case Intrinsic::aarch64_sve_cntw:
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index d0aacb457a39..59ec91843266 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -334,8 +334,10 @@ public:
return 2;
}
- bool emitGetActiveLaneMask() const {
- return ST->hasSVE();
+ PredicationStyle emitGetActiveLaneMask() const {
+ if (ST->hasSVE())
+ return PredicationStyle::DataAndControlFlow;
+ return PredicationStyle::None;
}
bool supportsScalableVectors() const { return ST->hasSVE(); }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 89e1d85a6085..aaef363e9b8d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
@@ -354,7 +355,9 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
"Return value without a vreg");
bool Success = true;
- if (!VRegs.empty()) {
+ if (!FLI.CanLowerReturn) {
+ insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister);
+ } else if (!VRegs.empty()) {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -464,6 +467,18 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
return Success;
}
+bool AArch64CallLowering::canLowerReturn(MachineFunction &MF,
+ CallingConv::ID CallConv,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ bool IsVarArg) const {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ const auto &TLI = *getTLI<AArch64TargetLowering>();
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
+ MF.getFunction().getContext());
+
+ return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv));
+}
+
/// Helper function to compute forwarded registers for musttail calls. Computes
/// the forwarded registers, sets MBB liveness, and emits COPY instructions that
/// can be used to save + restore registers later.
@@ -533,6 +548,12 @@ bool AArch64CallLowering::lowerFormalArguments(
SmallVector<ArgInfo, 8> SplitArgs;
SmallVector<std::pair<Register, Register>> BoolArgs;
+
+ // Insert the hidden sret parameter if the return value won't fit in the
+ // return registers.
+ if (!FLI.CanLowerReturn)
+ insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
+
unsigned i = 0;
for (auto &Arg : F.args()) {
if (DL.getTypeStoreSize(Arg.getType()).isZero())
@@ -1194,7 +1215,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Finally we can copy the returned value back into its virtual-register. In
// symmetry with the arguments, the physical register must be an
// implicit-define of the call instruction.
- if (!Info.OrigRet.Ty->isVoidTy()) {
+ if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
CallReturnHandler Handler(MIRBuilder, MRI, MIB);
bool UsingReturnedArg =
@@ -1226,6 +1247,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
.addImm(Assigner.StackOffset)
.addImm(CalleePopBytes);
+ if (!Info.CanLowerReturn) {
+ insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
+ Info.DemoteRegister, Info.DemoteStackIndex);
+ }
return true;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
index aafb1d19640a..cbdf77f69a63 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
@@ -35,6 +35,10 @@ public:
ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
Register SwiftErrorVReg) const override;
+ bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ bool IsVarArg) const override;
+
bool fallBackToDAGISel(const MachineFunction &MF) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 9a65687735fe..eb8d0552173d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1710,11 +1710,6 @@ bool AArch64InstructionSelector::selectCompareBranch(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
Register CondReg = I.getOperand(0).getReg();
MachineInstr *CCMI = MRI.getVRegDef(CondReg);
- if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
- CondReg = CCMI->getOperand(1).getReg();
- CCMI = MRI.getVRegDef(CondReg);
- }
-
// Try to select the G_BRCOND using whatever is feeding the condition if
// possible.
unsigned CCMIOpc = CCMI->getOpcode();
@@ -3346,12 +3341,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_SELECT: {
auto &Sel = cast<GSelect>(I);
- if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) {
- LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
- << ", expected: " << LLT::scalar(1) << '\n');
- return false;
- }
-
const Register CondReg = Sel.getCondReg();
const Register TReg = Sel.getTrueReg();
const Register FReg = Sel.getFalseReg();
@@ -4777,12 +4766,6 @@ static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
return false;
MachineInstr *ValDef = MRI.getVRegDef(Val);
unsigned Opcode = ValDef->getOpcode();
- if (Opcode == TargetOpcode::G_TRUNC) {
- // Look through a trunc.
- Val = ValDef->getOperand(1).getReg();
- ValDef = MRI.getVRegDef(Val);
- Opcode = ValDef->getOpcode();
- }
if (isa<GAnyCmp>(ValDef)) {
CanNegate = true;
MustBeFirst = false;
@@ -4870,12 +4853,6 @@ MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
auto &MRI = *MIB.getMRI();
MachineInstr *ValDef = MRI.getVRegDef(Val);
unsigned Opcode = ValDef->getOpcode();
- if (Opcode == TargetOpcode::G_TRUNC) {
- // Look through a trunc.
- Val = ValDef->getOperand(1).getReg();
- ValDef = MRI.getVRegDef(Val);
- Opcode = ValDef->getOpcode();
- }
if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
Register LHS = Cmp->getLHSReg();
Register RHS = Cmp->getRHSReg();
@@ -5026,31 +5003,17 @@ bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
// First, check if the condition is defined by a compare.
MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
- while (CondDef) {
- // We can only fold if all of the defs have one use.
- Register CondDefReg = CondDef->getOperand(0).getReg();
- if (!MRI.hasOneNonDBGUse(CondDefReg)) {
- // Unless it's another select.
- for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
- if (CondDef == &UI)
- continue;
- if (UI.getOpcode() != TargetOpcode::G_SELECT)
- return false;
- }
- }
-
- // We can skip over G_TRUNC since the condition is 1-bit.
- // Truncating/extending can have no impact on the value.
- unsigned Opc = CondDef->getOpcode();
- if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
- break;
-
- // Can't see past copies from physregs.
- if (Opc == TargetOpcode::COPY &&
- Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
- return false;
- CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
+ // We can only fold if all of the defs have one use.
+ Register CondDefReg = CondDef->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(CondDefReg)) {
+ // Unless it's another select.
+ for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
+ if (CondDef == &UI)
+ continue;
+ if (UI.getOpcode() != TargetOpcode::G_SELECT)
+ return false;
+ }
}
// Is the condition defined by a compare?
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 74ec9373ce9e..d3617b87a851 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -42,7 +42,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
: ST(&ST) {
using namespace TargetOpcode;
const LLT p0 = LLT::pointer(0, 64);
- const LLT s1 = LLT::scalar(1);
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
@@ -80,7 +79,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT &MinFPScalar = HasFP16 ? s16 : s32;
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
- .legalFor({p0, s1, s8, s16, s32, s64})
+ .legalFor({p0, s8, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
.widenScalarToNextPow2(0)
.clampScalar(0, s8, s64)
@@ -198,8 +197,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(
{G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
- .legalFor({{s32, s1}, {s64, s1}})
+ .legalFor({{s32, s32}, {s64, s32}})
.clampScalar(0, s32, s64)
+ .clampScalar(1, s32, s64)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
@@ -241,7 +241,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_INSERT)
.legalIf(all(typeInSet(0, {s32, s64, p0}),
- typeInSet(1, {s1, s8, s16, s32}), smallerThan(1, 0)))
+ typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(1)
@@ -260,8 +260,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
.maxScalarIf(typeInSet(1, {s128}), 0, s64);
- getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
- .lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
+
+ for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
+ auto &Actions = getActionDefinitionsBuilder(Op);
+
+ if (Op == G_SEXTLOAD)
+ Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));
+
+ // Atomics have zero extending behavior.
+ Actions
.legalForTypesWithMemDesc({{s32, p0, s8, 8},
{s32, p0, s16, 8},
{s32, p0, s32, 8},
@@ -278,6 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.unsupportedIfMemSizeNotPow2()
// Lower anything left over into G_*EXT and G_LOAD
.lower();
+ }
auto IsPtrVecPred = [=](const LegalityQuery &Query) {
const LLT &ValTy = Query.Types[0];
@@ -425,10 +433,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT &SrcTy = Query.Types[1];
- // Special case for s1.
- if (SrcTy == s1)
- return true;
-
// Make sure we fit in a register otherwise. Don't bother checking that
// the source type is below 128 bits. We shouldn't be allowing anything
// through which is wider than the destination in the first place.
@@ -481,13 +485,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0);
// Control-flow
- getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
+ getActionDefinitionsBuilder(G_BRCOND)
+ .legalFor({s32})
+ .clampScalar(0, s32, s32);
getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
getActionDefinitionsBuilder(G_SELECT)
- .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
+ .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
+ .clampScalar(1, s32, s32)
.minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
.lowerIf(isVector(0));
@@ -500,7 +507,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
getActionDefinitionsBuilder(G_PTRTOINT)
- .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
+ .legalForCartesianProduct({s8, s16, s32, s64}, {p0})
.legalFor({{v2s64, v2p0}})
.maxScalar(0, s64)
.widenScalarToNextPow2(0, /*Min*/ 8);
@@ -517,7 +524,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// FIXME: This is wrong since G_BITCAST is not allowed to change the
// number of bits but it's what the previous code described and fixing
// it breaks tests.
- .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
+ .legalForCartesianProduct({s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
v2p0});
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 2901e5c0fe4d..bd0a497fa441 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -43,11 +43,9 @@ namespace {
class AArch64MCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
- const MCInstrInfo &MCII;
public:
- AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : Ctx(ctx), MCII(mcii) {}
+ AArch64MCCodeEmitter(const MCInstrInfo &, MCContext &ctx) : Ctx(ctx) {}
AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) = delete;
void operator=(const AArch64MCCodeEmitter &) = delete;
~AArch64MCCodeEmitter() override = default;
@@ -193,12 +191,6 @@ public:
uint32_t encodeMatrixIndexGPR32(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
-
-private:
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
@@ -618,9 +610,6 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
if (MI.getOpcode() == AArch64::TLSDESCCALL) {
// This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
// following (BLR) instruction. It doesn't emit any code itself so it
@@ -674,7 +663,6 @@ unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison(
return EncodedValue;
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "AArch64GenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 34e3b2cf58e4..f129bfe11e4d 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -34,6 +34,7 @@ using namespace llvm;
#define GET_INSTRINFO_MC_DESC
#define GET_INSTRINFO_MC_HELPERS
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "AArch64GenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 049c49796dc6..7d1de3e53c0c 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -33,6 +33,7 @@ class MCSubtargetInfo;
class MCTargetOptions;
class MCTargetStreamer;
class Target;
+class FeatureBitset;
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
MCContext &Ctx);
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 2744e81f99f1..cb36aa26e839 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -227,6 +227,40 @@ class sme_add_vector_to_tile_u64<bit V, string mnemonic>
let Inst{2-0} = ZAda;
}
+class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty>
+ : Pseudo<(outs),
+ (ins i64imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>,
+ Sched<[]> {
+ // Translated to the actual instructions in AArch64ISelLowering.cpp
+ let usesCustomInserter = 1;
+}
+
+def ADDHA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>;
+def ADDVA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>;
+
+def : Pat<(int_aarch64_sme_addha
+ imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
+ (nxv4i32 ZPR32:$zn)),
+ (ADDHA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>;
+def : Pat<(int_aarch64_sme_addva
+ imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
+ (nxv4i32 ZPR32:$zn)),
+ (ADDVA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>;
+
+let Predicates = [HasSMEI64] in {
+def ADDHA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>;
+def ADDVA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>;
+
+def : Pat<(int_aarch64_sme_addha
+ imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
+ (nxv2i64 ZPR64:$zn)),
+ (ADDHA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>;
+def : Pat<(int_aarch64_sme_addva
+ imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
+ (nxv2i64 ZPR64:$zn)),
+ (ADDVA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>;
+}
+
//===----------------------------------------------------------------------===//
// SME Contiguous Loads
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 3631536a32b9..7cdd4c4af95e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -650,11 +650,11 @@ multiclass sve_int_pfalse<bits<6> opc, string asm> {
def : Pat<(nxv1i1 immAllZerosV), (!cast<Instruction>(NAME))>;
}
-class sve_int_ptest<bits<6> opc, string asm>
+class sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op>
: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
asm, "\t$Pg, $Pn",
"",
- []>, Sched<[]> {
+ [(op (nxv16i1 PPRAny:$Pg), (nxv16i1 PPR8:$Pn))]>, Sched<[]> {
bits<4> Pg;
bits<4> Pn;
let Inst{31-24} = 0b00100101;
@@ -1691,6 +1691,9 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
!cast<Instruction>(NAME), PTRUE_S>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2i1, nxv2i1,
!cast<Instruction>(NAME), PTRUE_D>;
+ // Emulate .Q operation using a PTRUE_D when the other lanes don't matter.
+ def : SVE_2_Op_AllActive_Pat<nxv1i1, op_nopred, nxv1i1, nxv1i1,
+ !cast<Instruction>(NAME), PTRUE_D>;
}
// An instance of sve_int_pred_log_and but uses op_nopred's first operand as the
@@ -1706,6 +1709,9 @@ multiclass sve_int_pred_log_v2<bits<4> opc, string asm, SDPatternOperator op,
(!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
def : Pat<(nxv2i1 (op_nopred nxv2i1:$Op1, nxv2i1:$Op2)),
(!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+ // Emulate .Q operation using a PTRUE_D when the other lanes don't matter.
+ def : Pat<(nxv1i1 (op_nopred nxv1i1:$Op1, nxv1i1:$Op2)),
+ (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 71303611265c..cf8891cff1b3 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -343,7 +343,8 @@ struct SysAlias {
: Name(N), Encoding(E), FeaturesRequired(F) {}
bool haveFeatures(FeatureBitset ActiveFeatures) const {
- return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+ return ActiveFeatures[llvm::AArch64::FeatureAll] ||
+ (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
}
FeatureBitset getRequiredFeatures() const { return FeaturesRequired; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index c4680cbedadf..91dc611fb265 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -317,6 +317,9 @@ extern char &SIFormMemoryClausesID;
void initializeSIPostRABundlerPass(PassRegistry&);
extern char &SIPostRABundlerID;
+void initializeGCNCreateVOPDPass(PassRegistry &);
+extern char &GCNCreateVOPDID;
+
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
extern char &AMDGPUUnifyDivergentExitNodesID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 94d7844e8a32..a8108b1d637b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -626,13 +626,13 @@ bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
Constant *FoldedT = SelOpNo ?
ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) :
ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL);
- if (isa<ConstantExpr>(FoldedT))
+ if (!FoldedT || isa<ConstantExpr>(FoldedT))
return false;
Constant *FoldedF = SelOpNo ?
ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) :
ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL);
- if (isa<ConstantExpr>(FoldedF))
+ if (!FoldedF || isa<ConstantExpr>(FoldedF))
return false;
IRBuilder<> Builder(&BO);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b00df27f5fd3..589992c7a7ec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1883,20 +1883,24 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
return true;
}
+// Match an immediate (if Imm is true) or an SGPR (if Imm is false)
+// offset. If Imm32Only is true, match only 32-bit immediate offsets
+// available on CI.
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
- SDValue &Offset, bool &Imm) const {
+ SDValue &Offset, bool Imm,
+ bool Imm32Only) const {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
if (!C) {
+ if (Imm)
+ return false;
if (ByteOffsetNode.getValueType().isScalarInteger() &&
ByteOffsetNode.getValueType().getSizeInBits() == 32) {
Offset = ByteOffsetNode;
- Imm = false;
return true;
}
if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
Offset = ByteOffsetNode.getOperand(0);
- Imm = false;
return true;
}
}
@@ -1908,9 +1912,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
int64_t ByteOffset = C->getSExtValue();
Optional<int64_t> EncodedOffset =
AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
- if (EncodedOffset) {
+ if (EncodedOffset && Imm && !Imm32Only) {
Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
- Imm = true;
return true;
}
@@ -1919,7 +1922,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
return false;
EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
- if (EncodedOffset) {
+ if (EncodedOffset && Imm32Only) {
Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
return true;
}
@@ -1927,11 +1930,14 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
return false;
- SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
- Offset = SDValue(
- CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
+ if (!Imm) {
+ SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
+ Offset = SDValue(
+ CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
+ return true;
+ }
- return true;
+ return false;
}
SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
@@ -1959,8 +1965,12 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
Ops), 0);
}
+// Match a base and an immediate (if Imm is true) or an SGPR
+// (if Imm is false) offset. If Imm32Only is true, match only 32-bit
+// immediate offsets available on CI.
bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
- SDValue &Offset, bool &Imm) const {
+ SDValue &Offset, bool Imm,
+ bool Imm32Only) const {
SDLoc SL(Addr);
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
@@ -1977,41 +1987,34 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
assert(N0 && N1 && isa<ConstantSDNode>(N1));
}
if (N0 && N1) {
- if (SelectSMRDOffset(N1, Offset, Imm)) {
+ if (SelectSMRDOffset(N1, Offset, Imm, Imm32Only)) {
SBase = Expand32BitAddress(N0);
return true;
}
}
+ return false;
}
+ if (!Imm)
+ return false;
SBase = Expand32BitAddress(Addr);
Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
- Imm = true;
return true;
}
bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
SDValue &Offset) const {
- bool Imm = false;
- return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
+ return SelectSMRD(Addr, SBase, Offset, /* Imm */ true);
}
bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
SDValue &Offset) const {
-
assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
-
- bool Imm = false;
- if (!SelectSMRD(Addr, SBase, Offset, Imm))
- return false;
-
- return !Imm && isa<ConstantSDNode>(Offset);
+ return SelectSMRD(Addr, SBase, Offset, /* Imm */ true, /* Imm32Only */ true);
}
bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
SDValue &Offset) const {
- bool Imm = false;
- return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
- !isa<ConstantSDNode>(Offset);
+ return SelectSMRD(Addr, SBase, Offset, /* Imm */ false);
}
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 862be9dc5568..7894b8eb5b67 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -193,11 +193,11 @@ private:
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &SAddr, SDValue &Offset) const;
- bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
- bool &Imm) const;
+ bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool Imm,
+ bool Imm32Only) const;
SDValue Expand32BitAddress(SDValue Addr) const;
- bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
- bool &Imm) const;
+ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, bool Imm,
+ bool Imm32Only = false) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ef7929012597..bf520a560404 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4803,6 +4803,8 @@ AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
case AtomicRMWInst::Nand:
case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:
+ case AtomicRMWInst::FMax:
+ case AtomicRMWInst::FMin:
return AtomicExpansionKind::CmpXChg;
default:
return AtomicExpansionKind::None;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3f242fdb6d8e..70fae9d784a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1180,7 +1180,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI);
if (Arg) {
- const int64_t Value = Arg.getValue().Value.getSExtValue();
+ const int64_t Value = Arg.value().Value.getSExtValue();
if (Value == 0) {
unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0);
@@ -3235,7 +3235,7 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
// Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0)
const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
- return false;
+ return Register();
if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) {
return Def->getOperand(1).getReg();
@@ -3851,27 +3851,36 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
getAddrModeInfo(*MI, *MRI, AddrInfo);
// FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
- // then we can select all ptr + 32-bit offsets not just immediate offsets.
- if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
+ // then we can select all ptr + 32-bit offsets.
+ if (AddrInfo.empty())
return None;
const GEPInfo &GEPInfo = AddrInfo[0];
+ Register PtrReg = GEPInfo.SgprParts[0];
+
// SGPR offset is unsigned.
- if (!GEPInfo.Imm || GEPInfo.Imm < 0 || !isUInt<32>(GEPInfo.Imm))
- return None;
+ if (AddrInfo[0].SgprParts.size() == 1 && isUInt<32>(GEPInfo.Imm) &&
+ GEPInfo.Imm != 0) {
+ // If we make it this far we have a load with an 32-bit immediate offset.
+ // It is OK to select this using a sgpr offset, because we have already
+ // failed trying to select this load into one of the _IMM variants since
+ // the _IMM Patterns are considered before the _SGPR patterns.
+ Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
+ .addImm(GEPInfo.Imm);
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
+ }
- // If we make it this far we have a load with an 32-bit immediate offset.
- // It is OK to select this using a sgpr offset, because we have already
- // failed trying to select this load into one of the _IMM variants since
- // the _IMM Patterns are considered before the _SGPR patterns.
- Register PtrReg = GEPInfo.SgprParts[0];
- Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
- .addImm(GEPInfo.Imm);
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
- }};
+ if (AddrInfo[0].SgprParts.size() == 2 && GEPInfo.Imm == 0) {
+ if (Register OffsetReg =
+ matchZeroExtendFromS32(*MRI, GEPInfo.SgprParts[1])) {
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
+ }
+ }
+
+ return None;
}
std::pair<Register, int>
@@ -4231,7 +4240,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
},
[=](MachineInstrBuilder &MIB) { // vaddr
if (FI)
- MIB.addFrameIndex(FI.getValue());
+ MIB.addFrameIndex(FI.value());
else
MIB.addReg(VAddr);
},
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 31012915457b..26e6b9a10688 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -542,63 +542,37 @@ def atomic_store_64_#as : PatFrag<(ops node:$ptr, node:$val),
}
} // End foreach as
-// TODO: Add GISelPredicateCode for the ret and noret PatFrags once
-// GlobalISelEmitter allows pattern matches where src and dst def count
-// mismatch.
-
-multiclass ret_noret_op {
- let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }],
- GISelPredicateCode = [{ return true; }] in {
- def "_ret" : PatFrag<(ops node:$ptr, node:$data),
- (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
- }
-
- let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }],
- GISelPredicateCode = [{ return false; }] in {
- def "_noret" : PatFrag<(ops node:$ptr, node:$data),
- (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
- }
+multiclass noret_op {
+ let HasNoUse = true in
+ def "_noret" : PatFrag<(ops node:$ptr, node:$data),
+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
}
-defm int_amdgcn_flat_atomic_fadd : ret_noret_op;
-defm int_amdgcn_flat_atomic_fadd_v2bf16 : ret_noret_op;
-defm int_amdgcn_flat_atomic_fmin : ret_noret_op;
-defm int_amdgcn_flat_atomic_fmax : ret_noret_op;
-defm int_amdgcn_global_atomic_fadd : ret_noret_op;
-defm int_amdgcn_global_atomic_fadd_v2bf16 : ret_noret_op;
-defm int_amdgcn_global_atomic_fmin : ret_noret_op;
-defm int_amdgcn_global_atomic_fmax : ret_noret_op;
-defm int_amdgcn_ds_fadd_v2bf16 : ret_noret_op;
-
-multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
- let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }],
- GISelPredicateCode = [{ return false; }] in {
- defm "_noret" : binary_atomic_op<atomic_op, IsInt>;
- }
+defm int_amdgcn_flat_atomic_fadd : noret_op;
+defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op;
+defm int_amdgcn_flat_atomic_fmin : noret_op;
+defm int_amdgcn_flat_atomic_fmax : noret_op;
+defm int_amdgcn_global_atomic_fadd : noret_op;
+defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
+defm int_amdgcn_global_atomic_fmin : noret_op;
+defm int_amdgcn_global_atomic_fmax : noret_op;
+defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
- let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }],
- GISelPredicateCode = [{ return true; }] in {
- defm "_ret" : binary_atomic_op<atomic_op, IsInt>;
- }
+multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
+ let HasNoUse = true in
+ defm "_noret" : binary_atomic_op<atomic_op, IsInt>;
}
-multiclass ret_noret_ternary_atomic_op<SDNode atomic_op> {
- let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }],
- GISelPredicateCode = [{ return false; }] in {
- defm "_noret" : ternary_atomic_op<atomic_op>;
- }
-
- let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }],
- GISelPredicateCode = [{ return true; }] in {
- defm "_ret" : ternary_atomic_op<atomic_op>;
- }
+multiclass noret_ternary_atomic_op<SDNode atomic_op> {
+ let HasNoUse = true in
+ defm "_noret" : ternary_atomic_op<atomic_op>;
}
multiclass binary_atomic_op_all_as<SDNode atomic_op, bit IsInt = 1> {
foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
defm "_"#as : binary_atomic_op<atomic_op, IsInt>;
- defm "_"#as : ret_noret_binary_atomic_op<atomic_op, IsInt>;
+ defm "_"#as : noret_binary_atomic_op<atomic_op, IsInt>;
}
}
}
@@ -640,13 +614,15 @@ def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
let AddressSpaces = StoreAddress_local.AddrSpaces in {
defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
-defm atomic_cmp_swap_local : ret_noret_ternary_atomic_op<atomic_cmp_swap>;
-defm atomic_cmp_swap_local_m0 : ret_noret_ternary_atomic_op<atomic_cmp_swap_glue>;
+defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>;
+defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
+defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
}
let AddressSpaces = StoreAddress_region.AddrSpaces in {
-defm atomic_cmp_swap_region : ret_noret_ternary_atomic_op<atomic_cmp_swap>;
-defm atomic_cmp_swap_region_m0 : ret_noret_ternary_atomic_op<atomic_cmp_swap_glue>;
+defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>;
+defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
+defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index ed6ddbf426fd..38e04dedd9fc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -171,6 +171,10 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
}
void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ // FIXME: Enable feature predicate checks once all the test pass.
+ // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(),
+ // getSubtargetInfo().getFeatureBits());
+
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
index 1b513c456307..745734aac2b4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
@@ -131,8 +131,8 @@ public:
bool IsAOneAddressSpace = isOneAddressSpace(A);
bool IsBOneAddressSpace = isOneAddressSpace(B);
- return AIO.getValue() >= BIO.getValue() &&
- (IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace);
+ return AIO.value() >= BIO.value() &&
+ (IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace);
}
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 77816a783630..6bd906439ee8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -40,9 +40,9 @@ using namespace llvm;
#include "AMDGPUGenSubtargetInfo.inc"
#undef AMDGPUSubtarget
-static cl::opt<bool> DisablePowerSched(
- "amdgpu-disable-power-sched",
- cl::desc("Disable scheduling to minimize mAI power bursts"),
+static cl::opt<bool> EnablePowerSched(
+ "amdgpu-enable-power-sched",
+ cl::desc("Enable scheduling to minimize mAI power bursts"),
cl::init(false));
static cl::opt<bool> EnableVGPRIndexMode(
@@ -916,7 +916,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
void apply(ScheduleDAGInstrs *DAGInstrs) override {
const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
- if (!ST.hasMAIInsts() || DisablePowerSched)
+ if (!ST.hasMAIInsts())
return;
DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
@@ -966,7 +966,8 @@ void GCNSubtarget::getPostRAMutations(
std::unique_ptr<ScheduleDAGMutation>
GCNSubtarget::createFillMFMAShadowMutation(const TargetInstrInfo *TII) const {
- return std::make_unique<FillMFMAShadowMutation>(&InstrInfo);
+ return EnablePowerSched ? std::make_unique<FillMFMAShadowMutation>(&InstrInfo)
+ : nullptr;
}
const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 1c6b9d35695a..971e44723758 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -22,11 +22,13 @@
#include "AMDGPUTargetTransformInfo.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
+#include "GCNVOPDUtils.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
@@ -278,6 +280,12 @@ static cl::opt<bool>
cl::desc("Enable s_delay_alu insertion"),
cl::init(true), cl::Hidden);
+// Enable GFX11+ VOPD
+static cl::opt<bool>
+ EnableVOPD("amdgpu-enable-vopd",
+ cl::desc("Enable VOPD, dual issue of VALU in wave32"),
+ cl::init(true), cl::Hidden);
+
// Option is used in lit tests to prevent deadcoding of patterns inspected.
static cl::opt<bool>
EnableDCEInRA("amdgpu-dce-in-ra",
@@ -383,6 +391,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIPreAllocateWWMRegsPass(*PR);
initializeSIFormMemoryClausesPass(*PR);
initializeSIPostRABundlerPass(*PR);
+ initializeGCNCreateVOPDPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
initializeAMDGPUAAWrapperPassPass(*PR);
initializeAMDGPUExternalAAWrapperPass(*PR);
@@ -920,6 +929,8 @@ public:
DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
DAG->addMutation(createIGroupLPDAGMutation());
DAG->addMutation(createSchedBarrierDAGMutation());
+ if (isPassEnabled(EnableVOPD, CodeGenOpt::Less))
+ DAG->addMutation(createVOPDPairingMutation());
return DAG;
}
@@ -1399,6 +1410,8 @@ void GCNPassConfig::addPreSched2() {
}
void GCNPassConfig::addPreEmitPass() {
+ if (isPassEnabled(EnableVOPD, CodeGenOpt::Less))
+ addPass(&GCNCreateVOPDID);
addPass(createSIMemoryLegalizerPass());
addPass(createSIInsertWaitcntsPass());
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index a087323e5de7..04dd3e938a15 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1412,10 +1412,12 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">;
multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isIntr = 0> {
foreach RtnMode = ["ret", "noret"] in {
- defvar Op = !cast<SDPatternOperator>(OpPrefix # "_" # RtnMode
+ defvar Op = !cast<SDPatternOperator>(OpPrefix
+ # !if(!eq(RtnMode, "ret"), "", "_noret")
# !if(isIntr, "", "_" # vt.Size));
defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
+ let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in {
def : GCNPat<
(vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), vt:$vdata_in)),
(!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in,
@@ -1428,6 +1430,7 @@ multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isInt
(!cast<MUBUF_Pseudo>(Inst # "_ADDR64" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in,
VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset)
>;
+ } // end let AddedComplexity
} // end foreach RtnMode
}
@@ -1439,10 +1442,12 @@ multiclass BufferAtomicIntrPat<string OpPrefix, ValueType vt, string Inst> {
multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> {
foreach RtnMode = ["ret", "noret"] in {
- defvar Op = !cast<SDPatternOperator>("AMDGPUatomic_cmp_swap_global_" # RtnMode
+ defvar Op = !cast<SDPatternOperator>("AMDGPUatomic_cmp_swap_global"
+ # !if(!eq(RtnMode, "ret"), "", "_noret")
# "_" # vt.Size);
defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
+ let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in {
defvar OffsetResDag = (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix)
getVregSrcForVT<data_vt>.ret:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset,
offset:$offset);
@@ -1465,6 +1470,7 @@ multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst>
!if(!eq(vt, i32), sub0, sub0_sub1)),
Addr64ResDag)
>;
+ } // end let AddedComplexity
} // end foreach RtnMode
}
@@ -1495,13 +1501,14 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
list<string> RtnModes = ["ret", "noret"]> {
foreach RtnMode = RtnModes in {
- defvar Op = !cast<SDPatternOperator>(!if(!eq(RtnMode, "none"),
- OpPrefix, OpPrefix # "_" # RtnMode));
- defvar InstSuffix = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")),
- "_RTN", "");
- defvar CachePolicy = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")),
+ defvar Op = !cast<SDPatternOperator>(OpPrefix
+ # !if(!eq(RtnMode, "ret"), "", "_noret"));
+
+ defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
+ defvar CachePolicy = !if(!eq(RtnMode, "ret"),
(set_glc $cachepolicy), (timm:$cachepolicy));
+ let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in {
def : GCNPat<
(vt (Op vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset,
timm:$offset, timm:$cachepolicy, 0)),
@@ -1534,6 +1541,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy)
>;
+ } // end let AddedComplexity
} // end foreach RtnMode
}
@@ -1551,7 +1559,7 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_or", i32, "BUFFER_ATOMIC_OR">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i32, "BUFFER_ATOMIC_XOR">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i32, "BUFFER_ATOMIC_INC">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i32, "BUFFER_ATOMIC_DEC">;
-defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["none"]>;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["ret"]>;
defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", i64, "BUFFER_ATOMIC_SWAP_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_add", i64, "BUFFER_ATOMIC_ADD_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_sub", i64, "BUFFER_ATOMIC_SUB_X2">;
@@ -1643,7 +1651,8 @@ let SubtargetPredicate = isGFX90APlus in {
foreach RtnMode = ["ret", "noret"] in {
-defvar Op = !cast<SDPatternOperator>(SIbuffer_atomic_cmpswap # "_" # RtnMode);
+defvar Op = !cast<SDPatternOperator>(SIbuffer_atomic_cmpswap
+ # !if(!eq(RtnMode, "ret"), "", "_noret"));
defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy),
(timm:$cachepolicy));
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 27b723875aa4..d8387bf6f1ae 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -950,10 +950,11 @@ defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">;
} // End AddedComplexity = 100
-class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
- (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
- (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))
->;
+class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0,
+ bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
+ (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))> {
+ let AddedComplexity = complexity;
+}
multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
@@ -965,75 +966,88 @@ multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
!cast<PatFrag>(frag#"_local_"#vt.Size)>;
}
- def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), 1>;
+ def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
}
multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
ValueType vt, string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
def : DSAtomicRetPat<inst, vt,
- !cast<PatFrag>(frag#"_local_m0_ret_"#vt.Size)>;
+ !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
def : DSAtomicRetPat<noRetInst, vt,
- !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size)>;
+ !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size), /* complexity */ 1>;
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
- !cast<PatFrag>(frag#"_local_ret_"#vt.Size)>;
+ !cast<PatFrag>(frag#"_local_"#vt.Size)>;
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
- !cast<PatFrag>(frag#"_local_noret_"#vt.Size)>;
+ !cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
}
def : DSAtomicRetPat<inst, vt,
- !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>;
+ !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
def : DSAtomicRetPat<noRetInst, vt,
- !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>;
+ !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+ /* complexity */ 1, /* gds */ 1>;
}
let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
-class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
+class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag,
+ int complexity = 0, bit gds=0> : GCNPat<
(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap),
- (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds))
->;
+ (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds))> {
+ let AddedComplexity = complexity;
+}
multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
- def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_ret_"#vt.Size)>;
- def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size)>;
+ def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
+ def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size),
+ /* complexity */ 1>;
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
- !cast<PatFrag>(frag#"_local_ret_"#vt.Size)>;
+ !cast<PatFrag>(frag#"_local_"#vt.Size)>;
def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
- !cast<PatFrag>(frag#"_local_noret_"#vt.Size)>;
+ !cast<PatFrag>(frag#"_local_noret_"#vt.Size),
+ /* complexity */ 1>;
}
- def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>;
- def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>;
+ def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
+ def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+ /* complexity */ 1, /* gds */ 1>;
}
} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
let SubtargetPredicate = isGFX11Plus in {
// The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode.
-class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
+class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag,
+ int complexity = 0, bit gds=0> : GCNPat<
(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap),
- (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, offset:$offset, (i1 gds))
->;
+ (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, offset:$offset, (i1 gds))> {
+ let AddedComplexity = complexity;
+}
multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> {
def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
- !cast<PatFrag>(frag#"_local_ret_"#vt.Size)>;
+ !cast<PatFrag>(frag#"_local_"#vt.Size)>;
def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
- !cast<PatFrag>(frag#"_local_noret_"#vt.Size)>;
+ !cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
- def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>;
- def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>;
+ def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
+ def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+ /* complexity */ 1, /* gds */ 1>;
}
} // End SubtargetPredicate = isGFX11Plus
@@ -1090,17 +1104,20 @@ defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B64, DS_CMPSTORE_B64, i64, "atomic_cmp
} // End SubtargetPredicate = isGFX11Plus
let SubtargetPredicate = isGFX90APlus in {
-def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_ret_64>;
+def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_64>;
+let AddedComplexity = 1 in
def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_64>;
}
let SubtargetPredicate = isGFX940Plus in {
-def : DSAtomicRetPat<DS_PK_ADD_RTN_F16, v2f16, atomic_load_fadd_v2f16_local_ret_32>;
+def : DSAtomicRetPat<DS_PK_ADD_RTN_F16, v2f16, atomic_load_fadd_v2f16_local_32>;
+let AddedComplexity = 1 in
def : DSAtomicRetPat<DS_PK_ADD_F16, v2f16, atomic_load_fadd_v2f16_local_noret_32>;
def : GCNPat <
- (v2i16 (int_amdgcn_ds_fadd_v2bf16_ret i32:$ptr, v2i16:$src)),
+ (v2i16 (int_amdgcn_ds_fadd_v2bf16 i32:$ptr, v2i16:$src)),
(DS_PK_ADD_RTN_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
>;
+let AddedComplexity = 1 in
def : GCNPat <
(v2i16 (int_amdgcn_ds_fadd_v2bf16_noret i32:$ptr, v2i16:$src)),
(DS_PK_ADD_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index cb2822818549..c634e15945ad 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1015,31 +1015,35 @@ class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt
multiclass FlatAtomicPat <string inst, string node, ValueType vt,
ValueType data_vt = vt> {
- defvar rtnNode = !cast<PatFrags>(node#"_ret_"#vt.Size);
+ defvar rtnNode = !cast<PatFrags>(node#"_"#vt.Size);
defvar noRtnNode = !cast<PatFrags>(node#"_noret_"#vt.Size);
def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)),
(!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+ let AddedComplexity = 1 in
def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)),
(!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
}
multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
- ValueType data_vt = vt, bit isIntr = 0> {
- defvar rtnNode = !cast<PatFrags>(node # "_ret" # !if(isIntr, "", "_" # vt.Size));
+ ValueType data_vt = vt, int complexity = 0,
+ bit isIntr = 0> {
+ defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size));
defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size));
+ let AddedComplexity = complexity in
def : GCNPat <(vt (rtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
(!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+ let AddedComplexity = !add(complexity, 1) in
def : GCNPat <(vt (noRtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
(!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
}
multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt,
ValueType data_vt = vt> {
- defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* isIntr */ 1>;
+ defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>;
}
class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -1260,17 +1264,16 @@ multiclass GlobalFLATAtomicPatsRtn<string nortn_inst_name, SDPatternOperator nod
multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt,
ValueType data_vt = vt, bit isIntr = 0> {
- defvar rtnNode = !cast<PatFrags>(node # "_ret" # !if(isIntr, "", "_" # vt.Size));
+ defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size));
defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size));
- let AddedComplexity = 10 in {
- defm : FlatSignedAtomicPat <inst, node, vt, data_vt, isIntr>;
- }
+ defm : FlatSignedAtomicPat <inst, node, vt, data_vt, /* complexity */ 10, isIntr>;
- let AddedComplexity = 11 in {
- def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), noRtnNode, vt, data_vt>;
- def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>;
- }
+ let AddedComplexity = 13 in
+ def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), noRtnNode, vt, data_vt>;
+
+ let AddedComplexity = 12 in
+ def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>;
}
multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
new file mode 100644
index 000000000000..83dc3bebf4d3
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -0,0 +1,175 @@
+//===- GCNCreateVOPD.cpp - Create VOPD Instructions ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Combine VALU pairs into VOPD instructions
+/// Only works on wave32
+/// Has register requirements, we reject creating VOPD if the requirements are
+/// not met.
+/// shouldCombineVOPD mutator in postRA machine scheduler puts candidate
+/// instructions for VOPD back-to-back
+///
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "GCNVOPDUtils.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include <utility>
+
+#define DEBUG_TYPE "gcn-create-vopd"
+STATISTIC(NumVOPDCreated, "Number of VOPD Insts Created.");
+
+using namespace llvm;
+
+namespace {
+
+class GCNCreateVOPD : public MachineFunctionPass {
+private:
+public:
+ static char ID;
+ const GCNSubtarget *ST = nullptr;
+
+ GCNCreateVOPD() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return "GCN Create VOPD Instructions";
+ }
+
+ bool doReplace(const SIInstrInfo *SII,
+ std::pair<MachineInstr *, MachineInstr *> &Pair) {
+ auto *FirstMI = Pair.first;
+ auto *SecondMI = Pair.second;
+ unsigned Opc1 = FirstMI->getOpcode();
+ unsigned Opc2 = SecondMI->getOpcode();
+ int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
+ AMDGPU::getVOPDOpcode(Opc2));
+ assert(NewOpcode != -1 &&
+ "Should have previously determined this as a possible VOPD\n");
+
+ auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
+ FirstMI->getDebugLoc(), SII->get(NewOpcode))
+ .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
+ VOPDInst.add(FirstMI->getOperand(0))
+ .add(SecondMI->getOperand(0))
+ .add(FirstMI->getOperand(1));
+
+ switch (Opc1) {
+ case AMDGPU::V_MOV_B32_e32:
+ break;
+ case AMDGPU::V_FMAMK_F32:
+ case AMDGPU::V_FMAAK_F32:
+ VOPDInst.add(FirstMI->getOperand(2));
+ VOPDInst.add(FirstMI->getOperand(3));
+ break;
+ default:
+ VOPDInst.add(FirstMI->getOperand(2));
+ break;
+ }
+
+ VOPDInst.add(SecondMI->getOperand(1));
+
+ switch (Opc2) {
+ case AMDGPU::V_MOV_B32_e32:
+ break;
+ case AMDGPU::V_FMAMK_F32:
+ case AMDGPU::V_FMAAK_F32:
+ VOPDInst.add(SecondMI->getOperand(2));
+ VOPDInst.add(SecondMI->getOperand(3));
+ break;
+ default:
+ VOPDInst.add(SecondMI->getOperand(2));
+ break;
+ }
+
+ VOPDInst.copyImplicitOps(*FirstMI);
+ VOPDInst.copyImplicitOps(*SecondMI);
+
+ LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
+ << *Pair.first << "\tY: " << *Pair.second << "\n");
+ FirstMI->eraseFromParent();
+ SecondMI->eraseFromParent();
+ ++NumVOPDCreated;
+ return true;
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
+ ST = &MF.getSubtarget<GCNSubtarget>();
+ if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
+ return false;
+ LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
+
+ const SIInstrInfo *SII = ST->getInstrInfo();
+ bool Changed = false;
+
+ SmallVector<std::pair<MachineInstr *, MachineInstr *>> ReplaceCandidates;
+
+ for (auto &MBB : MF) {
+ auto MII = MBB.begin(), E = MBB.end();
+ while (MII != E) {
+ auto *FirstMI = &*MII;
+ MII = next_nodbg(MII, MBB.end());
+ if (MII == MBB.end())
+ break;
+ if (FirstMI->isDebugInstr())
+ continue;
+ auto *SecondMI = &*MII;
+ unsigned Opc = FirstMI->getOpcode();
+ unsigned Opc2 = SecondMI->getOpcode();
+ llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
+ llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
+ std::pair<MachineInstr *, MachineInstr *> Pair;
+
+ if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
+ Pair = {FirstMI, SecondMI};
+ else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
+ Pair = {SecondMI, FirstMI};
+ else
+ continue;
+ // checkVOPDRegConstraints cares about program order, but doReplace
+ // cares about X-Y order in the constituted VOPD
+ if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
+ ReplaceCandidates.push_back(Pair);
+ ++MII;
+ }
+ }
+ }
+ for (auto &Pair : ReplaceCandidates) {
+ Changed |= doReplace(SII, Pair);
+ }
+
+ return Changed;
+ }
+};
+
+} // namespace
+
+char GCNCreateVOPD::ID = 0;
+
+char &llvm::GCNCreateVOPDID = GCNCreateVOPD::ID;
+
+INITIALIZE_PASS(GCNCreateVOPD, DEBUG_TYPE, "GCN Create VOPD Instructions",
+ false, false)
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 1cd880eaa48e..5d254518c67a 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -143,13 +143,20 @@ bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
}
int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const {
- auto DPP32 = AMDGPU::getDPPOp32(Op);
+ int DPP32 = AMDGPU::getDPPOp32(Op);
if (IsShrinkable) {
assert(DPP32 == -1);
- auto E32 = AMDGPU::getVOPe32(Op);
+ int E32 = AMDGPU::getVOPe32(Op);
DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32);
}
- return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
+ if (DPP32 != -1 && TII->pseudoToMCOpcode(DPP32) != -1)
+ return DPP32;
+ int DPP64 = -1;
+ if (ST->hasVOP3DPP())
+ DPP64 = AMDGPU::getDPPOp64(Op);
+ if (DPP64 != -1 && TII->pseudoToMCOpcode(DPP64) != -1)
+ return DPP64;
+ return -1;
}
// tracks the register operand definition and returns:
@@ -188,6 +195,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
+ bool HasVOP3DPP = ST->hasVOP3DPP();
auto OrigOp = OrigMI.getOpcode();
auto DPPOp = getDPPOp(OrigOp, IsShrinkable);
if (DPPOp == -1) {
@@ -201,10 +209,18 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
bool Fail = false;
do {
- auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
- assert(Dst);
- DPPInst.add(*Dst);
- int NumOperands = 1;
+ int NumOperands = 0;
+ if (auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) {
+ DPPInst.add(*Dst);
+ ++NumOperands;
+ }
+ if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
+ if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
+ DPPInst.add(*SDst);
+ ++NumOperands;
+ }
+ // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst
+ }
const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
if (OldIdx != -1) {
@@ -230,7 +246,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
AMDGPU::OpName::src0_modifiers)) {
assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
AMDGPU::OpName::src0_modifiers));
- assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+ assert(HasVOP3DPP ||
+ (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
DPPInst.addImm(Mod0->getImm());
++NumOperands;
} else if (AMDGPU::getNamedOperandIdx(DPPOp,
@@ -253,7 +270,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
AMDGPU::OpName::src1_modifiers)) {
assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
AMDGPU::OpName::src1_modifiers));
- assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+ assert(HasVOP3DPP ||
+ (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
DPPInst.addImm(Mod1->getImm());
++NumOperands;
} else if (AMDGPU::getNamedOperandIdx(DPPOp,
@@ -261,7 +279,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst.addImm(0);
++NumOperands;
}
- if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+ auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
+ if (Src1) {
if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
Fail = true;
@@ -270,8 +289,17 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst.add(*Src1);
++NumOperands;
}
-
- if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
+ if (auto *Mod2 =
+ TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers)) {
+ assert(NumOperands ==
+ AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers));
+ assert(HasVOP3DPP ||
+ (0LL == (Mod2->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
+ DPPInst.addImm(Mod2->getImm());
+ ++NumOperands;
+ }
+ auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
+ if (Src2) {
if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
@@ -279,8 +307,62 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
break;
}
DPPInst.add(*Src2);
+ ++NumOperands;
+ }
+ if (HasVOP3DPP) {
+ auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
+ if (ClampOpr &&
+ AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::clamp) != -1) {
+ DPPInst.addImm(ClampOpr->getImm());
+ }
+ auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in);
+ if (VdstInOpr &&
+ AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::vdst_in) != -1) {
+ DPPInst.add(*VdstInOpr);
+ }
+ auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod);
+ if (OmodOpr &&
+ AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::omod) != -1) {
+ DPPInst.addImm(OmodOpr->getImm());
+ }
+ // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to
+ // all 1.
+ if (auto *OpSelOpr =
+ TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {
+ auto OpSel = OpSelOpr->getImm();
+ if (OpSel != 0) {
+ LLVM_DEBUG(dbgs() << " failed: op_sel must be zero\n");
+ Fail = true;
+ break;
+ }
+ if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel) != -1)
+ DPPInst.addImm(OpSel);
+ }
+ if (auto *OpSelHiOpr =
+ TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {
+ auto OpSelHi = OpSelHiOpr->getImm();
+ // Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check
+ // the bitmask for 3 op_sel_hi bits set
+ assert(Src2 && "Expected vop3p with 3 operands");
+ if (OpSelHi != 7) {
+ LLVM_DEBUG(dbgs() << " failed: op_sel_hi must be all set to one\n");
+ Fail = true;
+ break;
+ }
+ if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel_hi) != -1)
+ DPPInst.addImm(OpSelHi);
+ }
+ auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo);
+ if (NegOpr &&
+ AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_lo) != -1) {
+ DPPInst.addImm(NegOpr->getImm());
+ }
+ auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi);
+ if (NegHiOpr &&
+ AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_hi) != -1) {
+ DPPInst.addImm(NegHiOpr->getImm());
+ }
}
-
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
@@ -531,8 +613,16 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
}
bool IsShrinkable = isShrinkable(OrigMI);
- if (!(IsShrinkable || TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) {
- LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
+ if (!(IsShrinkable ||
+ ((TII->isVOP3P(OrigOp) || TII->isVOPC(OrigOp) ||
+ TII->isVOP3(OrigOp)) &&
+ ST->hasVOP3DPP()) ||
+ TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) {
+ LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3/3P/C\n");
+ break;
+ }
+ if (OrigMI.modifiesRegister(AMDGPU::EXEC, ST->getRegisterInfo())) {
+ LLVM_DEBUG(dbgs() << " failed: can't combine v_cmpx\n");
break;
}
@@ -543,9 +633,12 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
break;
}
+ auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
assert(Src0 && "Src1 without Src0?");
- if (Src1 && Src1->isIdenticalTo(*Src0)) {
- assert(Src1->isReg());
+ if ((Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) ||
+ (Src2 && Src2->isIdenticalTo(*Src0)))) ||
+ (Use == Src1 && (Src1->isIdenticalTo(*Src0) ||
+ (Src2 && Src2->isIdenticalTo(*Src1))))) {
LLVM_DEBUG(
dbgs()
<< " " << OrigMI
diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
new file mode 100644
index 000000000000..a5008e39d91a
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
@@ -0,0 +1,212 @@
+//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the AMDGPU DAG scheduling
+/// mutation to pair VOPD instructions back to back. It also contains
+// subroutines useful in the creation of VOPD instructions
+//
+//===----------------------------------------------------------------------===//
+
+#include "GCNVOPDUtils.h"
+#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "gcn-vopd-utils"
+
+bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
+ const MachineInstr &FirstMI,
+ const MachineInstr &SecondMI) {
+ const MachineFunction *MF = FirstMI.getMF();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const unsigned NumVGPRBanks = 4;
+ // Literals also count against scalar bus limit
+ SmallVector<const MachineOperand *> UniqueLiterals;
+ auto addLiteral = [&](const MachineOperand &Op) {
+ for (auto &Literal : UniqueLiterals) {
+ if (Literal->isIdenticalTo(Op))
+ return;
+ }
+ UniqueLiterals.push_back(&Op);
+ };
+ SmallVector<Register> UniqueScalarRegs;
+ assert([&]() -> bool {
+ for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
+ MII != FirstMI.getParent()->instr_end(); ++MII) {
+ if (&*MII == &SecondMI)
+ return true;
+ }
+ return false;
+ }() && "Expected FirstMI to precede SecondMI");
+ // Cannot pair dependent instructions
+ for (const auto &Use : SecondMI.uses())
+ if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg()))
+ return false;
+
+ struct ComponentInfo {
+ ComponentInfo(const MachineInstr &MI) : MI(MI) {}
+ Register Dst, Reg0, Reg1, Reg2;
+ const MachineInstr &MI;
+ };
+ ComponentInfo CInfo[] = {ComponentInfo(FirstMI), ComponentInfo(SecondMI)};
+
+ for (ComponentInfo &Comp : CInfo) {
+ switch (Comp.MI.getOpcode()) {
+ case AMDGPU::V_FMAMK_F32:
+ // cannot inline the fixed literal in fmamk
+ addLiteral(Comp.MI.getOperand(2));
+ Comp.Reg2 = Comp.MI.getOperand(3).getReg();
+ break;
+ case AMDGPU::V_FMAAK_F32:
+ // cannot inline the fixed literal in fmaak
+ addLiteral(Comp.MI.getOperand(3));
+ Comp.Reg1 = Comp.MI.getOperand(2).getReg();
+ break;
+ case AMDGPU::V_FMAC_F32_e32:
+ case AMDGPU::V_DOT2_F32_F16:
+ case AMDGPU::V_DOT2_F32_BF16:
+ Comp.Reg1 = Comp.MI.getOperand(2).getReg();
+ Comp.Reg2 = Comp.MI.getOperand(0).getReg();
+ break;
+ case AMDGPU::V_CNDMASK_B32_e32:
+ UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
+ Comp.Reg1 = Comp.MI.getOperand(2).getReg();
+ break;
+ case AMDGPU::V_MOV_B32_e32:
+ break;
+ default:
+ Comp.Reg1 = Comp.MI.getOperand(2).getReg();
+ break;
+ }
+
+ Comp.Dst = Comp.MI.getOperand(0).getReg();
+
+ const MachineOperand &Op0 = Comp.MI.getOperand(1);
+ if (Op0.isReg()) {
+ if (!TRI->isVectorRegister(MRI, Op0.getReg())) {
+ if (!is_contained(UniqueScalarRegs, Op0.getReg()))
+ UniqueScalarRegs.push_back(Op0.getReg());
+ } else
+ Comp.Reg0 = Op0.getReg();
+ } else {
+ if (!TII.isInlineConstant(Comp.MI, 1))
+ addLiteral(Op0);
+ }
+ }
+
+ if (UniqueLiterals.size() > 1)
+ return false;
+ if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
+ return false;
+
+ // check port 0
+ if (CInfo[0].Reg0 && CInfo[1].Reg0 &&
+ CInfo[0].Reg0 % NumVGPRBanks == CInfo[1].Reg0 % NumVGPRBanks)
+ return false;
+ // check port 1
+ if (CInfo[0].Reg1 && CInfo[1].Reg1 &&
+ CInfo[0].Reg1 % NumVGPRBanks == CInfo[1].Reg1 % NumVGPRBanks)
+ return false;
+ // check port 2
+ if (CInfo[0].Reg2 && CInfo[1].Reg2 &&
+ !((CInfo[0].Reg2 ^ CInfo[1].Reg2) & 0x1))
+ return false;
+ if (!((CInfo[0].Dst ^ CInfo[1].Dst) & 0x1))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
+ << "\n\tY: " << SecondMI << "\n");
+ return true;
+}
+
+/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
+/// together. Given SecondMI, when FirstMI is unspecified, then check if
+/// SecondMI may be part of a fused pair at all.
+static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
+ unsigned Opc2 = SecondMI.getOpcode();
+ auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
+
+ // One instruction case
+ if (!FirstMI)
+ return SecondCanBeVOPD.Y;
+
+ unsigned Opc = FirstMI->getOpcode();
+ auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
+
+ if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
+ (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
+ return false;
+
+ return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
+}
+
+/// Adapts design from MacroFusion
+/// Puts valid candidate instructions back-to-back so they can easily
+/// be turned into VOPD instructions
+/// Greedily pairs instruction candidates. O(n^2) algorithm.
+struct VOPDPairingMutation : ScheduleDAGMutation {
+ ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer
+
+ VOPDPairingMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
+ : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
+
+ void apply(ScheduleDAGInstrs *DAG) override {
+ const TargetInstrInfo &TII = *DAG->TII;
+ const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
+ if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
+ LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
+ return;
+ }
+
+ std::vector<SUnit>::iterator ISUI, JSUI;
+ for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
+ const MachineInstr *IMI = ISUI->getInstr();
+ if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
+ continue;
+ if (!hasLessThanNumFused(*ISUI, 2))
+ continue;
+
+ for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
+ if (JSUI->isBoundaryNode())
+ continue;
+ const MachineInstr *JMI = JSUI->getInstr();
+ if (!hasLessThanNumFused(*JSUI, 2) ||
+ !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
+ continue;
+ if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
+ break;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
+ }
+};
+
+std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
+ return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h
new file mode 100644
index 000000000000..22361b9a1a07
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h
@@ -0,0 +1,32 @@
+//===- GCNVOPDUtils.h - GCN VOPD Utils ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the AMDGPU DAG scheduling
+/// mutation to pair VOPD instructions back to back. It also contains
+// subroutines useful in the creation of VOPD instructions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_VOPDUTILS_H
+#define LLVM_LIB_TARGET_AMDGPU_VOPDUTILS_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+class SIInstrInfo;
+
+bool checkVOPDRegConstraints(const SIInstrInfo &TII,
+ const MachineInstr &FirstMI,
+ const MachineInstr &SecondMI);
+
+std::unique_ptr<ScheduleDAGMutation> createVOPDPairingMutation();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_VOPDUTILS_H
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index 02c213f90f89..228963ff2a20 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -62,12 +62,6 @@ public:
virtual void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const = 0;
-
-protected:
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
};
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 11fe3f9ef058..fba4b1a3db66 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -36,6 +36,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "AMDGPUGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 060d4b660632..c2e2563c3989 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -50,6 +50,7 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_OPERAND_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "AMDGPUGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 78eb304fe84f..3d926e52c368 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -58,11 +58,6 @@ private:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
-
};
} // end anonymous namespace
@@ -90,11 +85,8 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
}
void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (MI.getOpcode() == R600::RETURN ||
MI.getOpcode() == R600::FETCH_CLAUSE ||
@@ -187,5 +179,4 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
return MO.getImm();
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "R600GenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
index 269209a12175..b9ff195e0ddc 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
@@ -13,10 +13,12 @@
#include "R600MCTargetDesc.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "R600GenInstrInfo.inc"
MCInstrInfo *llvm::createR600MCInstrInfo() {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h
index 605ae851378d..b4ce748532f8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h
@@ -35,6 +35,7 @@ MCInstrInfo *createR600MCInstrInfo();
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_OPERAND_ENUM
#define GET_INSTRINFO_SCHED_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "R600GenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 5e67fb5ec876..e093d78b2cc6 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -310,11 +310,8 @@ uint64_t SIMCCodeEmitter::getImplicitOpSelHiEncoding(int Opcode) const {
}
void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
int Opcode = MI.getOpcode();
APInt Encoding, Scratch;
getBinaryCodeForInstr(MI, Fixups, Encoding, Scratch, STI);
@@ -574,5 +571,4 @@ void SIMCCodeEmitter::getMachineOpValueCommon(
llvm_unreachable("Encoding of this operand type is not supported yet.");
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index bf52f7830ad7..5199a37a0519 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1623,7 +1623,7 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
NewBldVec);
}
-SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
+SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
SelectionDAG &DAG,
const SDLoc &DL) const {
// Old -> New swizzle values
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index 1e75a0432ec3..e7706fa0ef5c 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -74,8 +74,8 @@ private:
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
MachineRegisterInfo & MRI, unsigned dword_offset) const;
- SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG,
- const SDLoc &DL) const;
+ SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
+ SelectionDAG &DAG, const SDLoc &DL) const;
SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const;
SDValue lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
index 8f7807a2b472..f81f5122bbc9 100644
--- a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
@@ -13,6 +13,7 @@
//
#include "AMDGPUMCInstLower.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600AsmPrinter.h"
#include "R600Subtarget.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -42,6 +43,9 @@ void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
}
void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
+ R600_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
R600MCInstLower MCInstLowering(OutContext, STI, *this);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 094d5cd58673..d16da2a8b86b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -352,7 +352,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// TODO: Generalize to more vector types.
setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT},
{MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
- MVT::v4i16, MVT::v4f16, MVT::v16i16, MVT::v16f16},
+ MVT::v4i16, MVT::v4f16},
Custom);
// Deal with vec3 vector operations when widened to vec4.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 814a7c446889..799d34e32d27 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3335,15 +3335,18 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
MachineInstr *DefMI;
- const auto killDef = [&DefMI, &MBB, this]() -> void {
+ const auto killDef = [&]() -> void {
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// The only user is the instruction which will be killed.
- if (!MRI.hasOneNonDBGUse(DefMI->getOperand(0).getReg()))
+ Register DefReg = DefMI->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(DefReg))
return;
// We cannot just remove the DefMI here, calling pass will crash.
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
DefMI->removeOperand(I);
+ if (LV)
+ LV->getVarInfo(DefReg).AliveBlocks.clear();
};
int64_t Imm;
@@ -3982,6 +3985,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+ int Src3Idx = -1;
+ if (Src0Idx == -1) {
+ // VOPD V_DUAL_* instructions use different operand names.
+ Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
+ Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
+ Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
+ Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
+ }
// Make sure the number of operands is correct.
const MCInstrDesc &Desc = get(Opcode);
@@ -4255,9 +4266,9 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
// Only look at the true operands. Only a real operand can use the constant
// bus, and we don't want to check pseudo-operands like the source modifier
// flags.
- for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) {
+ for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
if (OpIdx == -1)
- break;
+ continue;
const MachineOperand &MO = MI.getOperand(OpIdx);
if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
if (MO.isReg()) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 311f9f68e675..1b411eb83eb3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1242,6 +1242,9 @@ namespace AMDGPU {
int getDPPOp32(uint16_t Opcode);
LLVM_READONLY
+ int getDPPOp64(uint16_t Opcode);
+
+ LLVM_READONLY
int getBasicFromSDWAOp(uint16_t Opcode);
LLVM_READONLY
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 29ee9f12b12d..23afd6556bc9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -193,43 +193,32 @@ def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
-multiclass SDBufferAtomicRetNoRet {
- def "_ret" : PatFrag<
- (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
- node:$offset, node:$cachepolicy, node:$idxen),
- (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
- node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
- node:$idxen)> {
- let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }];
- let GISelPredicateCode = [{ return true; }];
- }
-
+multiclass SDBufferAtomicNoRet {
def "_noret" : PatFrag<
(ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
node:$offset, node:$cachepolicy, node:$idxen),
(!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
node:$idxen)> {
- let PredicateCode = [{ return SDValue(N, 0).use_empty(); }];
- let GISelPredicateCode = [{ return false; }];
+ let HasNoUse = true;
}
}
-defm SIbuffer_atomic_swap : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_add : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_sub : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_smin : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_umin : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_smax : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_umax : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_and : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_or : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_xor : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_inc : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_dec : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_fadd : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_fmin : SDBufferAtomicRetNoRet;
-defm SIbuffer_atomic_fmax : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_swap : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_add : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_sub : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_smin : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_umin : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_smax : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_umax : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_and : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_or : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_xor : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_inc : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_dec : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_fadd : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_fmin : SDBufferAtomicNoRet;
+defm SIbuffer_atomic_fmax : SDBufferAtomicNoRet;
def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
SDTypeProfile<1, 9,
@@ -246,24 +235,13 @@ def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
-def SIbuffer_atomic_cmpswap_ret : PatFrag<
- (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
- node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
- (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
- node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
- node:$idxen)> {
- let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }];
- let GISelPredicateCode = [{ return true; }];
-}
-
def SIbuffer_atomic_cmpswap_noret : PatFrag<
(ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
(SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
node:$idxen)> {
- let PredicateCode = [{ return SDValue(N, 0).use_empty(); }];
- let GISelPredicateCode = [{ return false; }];
+ let HasNoUse = true;
}
class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
@@ -774,13 +752,13 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
let AddressSpaces = StoreAddress_local.AddrSpaces in {
defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
- defm _local_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
+ defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
IsInt>;
}
let AddressSpaces = StoreAddress_region.AddrSpaces in {
defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
- defm _region_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
+ defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
IsInt>;
}
}
@@ -2194,21 +2172,21 @@ class getAsmVOP3DPPBase <int NumSrcArgs, bit HasDst, bit HasClamp,
"$sdst",
"$vdst"),
""); // use $sdst for VOPC
- string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
- string isrc1 = !if(!eq(NumSrcArgs, 1), "",
- !if(!eq(NumSrcArgs, 2), " $src1",
- " $src1,"));
- string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
+ string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+ string src1nomods = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1",
+ " $src1,"));
+ string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", "");
- string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
- string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
- !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
- " $src1_modifiers,"));
- string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
+ string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+ string src1mods = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+ " $src1_modifiers,"));
+ string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
- string src0 = !if(Src0HasMods, fsrc0, isrc0);
- string src1 = !if(Src1HasMods, fsrc1, isrc1);
- string src2 = !if(Src2HasMods, fsrc2, isrc2);
+ string src0 = !if(Src0HasMods, src0mods, src0nomods);
+ string src1 = !if(Src1HasMods, src1mods, src1nomods);
+ string src2 = !if(Src2HasMods, src2mods, src2nomods);
string opsel = !if(HasOpSel, "$op_sel", "");
string 3PMods = !if(IsVOP3P,
!if(HasOpSel, "$op_sel_hi", "")
@@ -2559,8 +2537,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
// the asm operand name via this HasModifiers flag
field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
field string AsmVOP3DPPBase = getAsmVOP3DPPBase<NumSrcArgs, HasDst, HasClamp,
- HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasSrc0FloatMods, HasSrc1FloatMods,
- HasSrc2FloatMods, DstVT >.ret;
+ HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
+ HasModifiers, DstVT>.ret;
field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3DPPBase>.ret;
field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3DPPBase>.ret;
field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret;
@@ -2800,6 +2778,14 @@ def getDPPOp32 : InstrMapping {
let ValueCols = [["DPP"]];
}
+def getDPPOp64 : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["AsmVariantName"];
+ let KeyCol = ["VOP3"];
+ let ValueCols = [["VOP3_DPP"]];
+}
+
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
let FilterClass = "Commutable_REV";
@@ -2961,6 +2947,27 @@ def getVCMPXOpFromVCMP : InstrMapping {
let ValueCols = [["1"]];
}
+def VOPDComponentTable : GenericTable {
+ let FilterClass = "VOPD_Component";
+ let CppTypeName = "VOPDComponentInfo";
+ let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"];
+ let PrimaryKey = ["BaseVOP"];
+ let PrimaryKeyName = "getVOPDComponentHelper";
+}
+
+def VOPDPairs : GenericTable {
+ let FilterClass = "VOPD_Base";
+ let CppTypeName = "VOPDInfo";
+ let Fields = ["Opcode", "OpX", "OpY"];
+ let PrimaryKey = ["Opcode"];
+ let PrimaryKeyName = "getVOPDOpcodeHelper";
+}
+
+def getVOPDInfoFromComponentOpcodes : SearchIndex {
+ let Table = VOPDPairs;
+ let Key = ["OpX", "OpY"];
+}
+
include "SIInstructions.td"
include "DSInstructions.td"
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 829669157893..ce8c03bb8d64 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1449,6 +1449,14 @@ def : BitConvert <v8i32, v16f16, VReg_256>;
def : BitConvert <v8i32, v16i16, VReg_256>;
def : BitConvert <v8f32, v16f16, VReg_256>;
def : BitConvert <v8f32, v16i16, VReg_256>;
+def : BitConvert <v16f16, v4i64, VReg_256>;
+def : BitConvert <v16i16, v4i64, VReg_256>;
+def : BitConvert <v16f16, v4f64, VReg_256>;
+def : BitConvert <v16i16, v4f64, VReg_256>;
+def : BitConvert <v4i64, v16f16, VReg_256>;
+def : BitConvert <v4i64, v16i16, VReg_256>;
+def : BitConvert <v4f64, v16f16, VReg_256>;
+def : BitConvert <v4f64, v16i16, VReg_256>;
// 512-bit bitcast
def : BitConvert <v16i32, v16f32, VReg_512>;
@@ -3012,6 +3020,35 @@ multiclass Int16Med3Pat<Instruction med3Inst,
def : FPMed3Pat<f32, V_MED3_F32_e64>;
+class
+IntMinMaxPat<Instruction minmaxInst, SDPatternOperator min_or_max,
+ SDPatternOperator max_or_min_oneuse> : AMDGPUPat <
+ (DivergentBinFrag<min_or_max> (max_or_min_oneuse i32:$src0, i32:$src1),
+ i32:$src2),
+ (minmaxInst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
+>;
+
+class
+FPMinMaxPat<Instruction minmaxInst, ValueType vt, SDPatternOperator min_or_max,
+ SDPatternOperator max_or_min_oneuse> : GCNPat <
+ (min_or_max (max_or_min_oneuse (VOP3Mods vt:$src0, i32:$src0_mods),
+ (VOP3Mods vt:$src1, i32:$src1_mods)),
+ (vt (VOP3Mods vt:$src2, i32:$src2_mods))),
+ (minmaxInst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2,
+ DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+let OtherPredicates = [isGFX11Plus] in {
+def : IntMinMaxPat<V_MAXMIN_I32_e64, smin, smax_oneuse>;
+def : IntMinMaxPat<V_MINMAX_I32_e64, smax, smin_oneuse>;
+def : IntMinMaxPat<V_MAXMIN_U32_e64, umin, umax_oneuse>;
+def : IntMinMaxPat<V_MINMAX_U32_e64, umax, umin_oneuse>;
+def : FPMinMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
+def : FPMinMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
+}
+
let OtherPredicates = [isGFX9Plus] in {
def : FP16Med3Pat<f16, V_MED3_F16_e64>;
defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax, smax_oneuse, smin_oneuse>;
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 607383ab8cde..67077a2eaa6b 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -148,6 +148,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addUsedIfAvailable<LiveIntervals>();
// Should preserve the same set that TwoAddressInstructions does.
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<SlotIndexes>();
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index dd881ec42d53..786b6b61cb23 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -72,7 +72,7 @@ INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
-/// Insert restore code for the callee-saved registers used in the function.
+/// Insert spill code for the callee-saved registers used in the function.
static void insertCSRSaves(MachineBasicBlock &SaveBlock,
ArrayRef<CalleeSavedInfo> CSI,
LiveIntervals *LIS) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index e426e938b856..ff5587fbb0ca 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -1883,7 +1883,13 @@ void SIScheduleDAGMI::schedule()
LLVM_DEBUG(dbgs() << "Preparing Scheduling\n");
buildDAGWithRegPressure();
+ postprocessDAG();
+
LLVM_DEBUG(dump());
+ if (PrintDAGs)
+ dump();
+ if (ViewMISchedDAGs)
+ viewGraph();
topologicalSort();
findRootsAndBiasEdges(TopRoots, BotRoots);
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 8a66213931ff..6b93769949bc 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2329,13 +2329,13 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
continue;
if (const auto &MOI = MOA.getLoadInfo(MI))
- Changed |= expandLoad(MOI.getValue(), MI);
+ Changed |= expandLoad(MOI.value(), MI);
else if (const auto &MOI = MOA.getStoreInfo(MI))
- Changed |= expandStore(MOI.getValue(), MI);
+ Changed |= expandStore(MOI.value(), MI);
else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
- Changed |= expandAtomicFence(MOI.getValue(), MI);
+ Changed |= expandAtomicFence(MOI.value(), MI);
else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
- Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
+ Changed |= expandAtomicCmpxchgOrRmw(MOI.value(), MI);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 5215397d5936..66bc46aaefea 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -9,6 +9,7 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
@@ -20,10 +21,40 @@ using namespace llvm;
namespace {
class SIOptimizeExecMasking : public MachineFunctionPass {
+ MachineFunction *MF = nullptr;
+ const GCNSubtarget *ST = nullptr;
+ const SIRegisterInfo *TRI = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+
+ Register isCopyFromExec(const MachineInstr &MI) const;
+ Register isCopyToExec(const MachineInstr &MI) const;
+ bool removeTerminatorBit(MachineInstr &MI) const;
+ MachineBasicBlock::reverse_iterator
+ fixTerminators(MachineBasicBlock &MBB) const;
+ MachineBasicBlock::reverse_iterator
+ findExecCopy(MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I,
+ unsigned CopyToExec) const;
+
+ bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start,
+ MCRegister Reg, bool UseLiveOuts = false,
+ bool IgnoreStart = false) const;
+ bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg) const;
+ MachineInstr *findInstrBackwards(MachineInstr &Origin,
+ std::function<bool(MachineInstr *)> Pred,
+ ArrayRef<MCRegister> NonModifiableRegs,
+ unsigned MaxInstructions = 20) const;
+ MachineInstr *findPossibleVCMPVCMPXOptimization(MachineInstr &SaveExec,
+ MCRegister Exec) const;
+ bool optimizeExecSequence() const;
+ bool optimizeVCmpxAndSaveexecSequence() const;
+ bool optimizeSingleVCMPSaveExecSequence(MachineInstr &SaveExecInstr,
+ MachineInstr &VCmp,
+ MCRegister Exec) const;
+
public:
static char ID;
-public:
SIOptimizeExecMasking() : MachineFunctionPass(ID) {
initializeSIOptimizeExecMaskingPass(*PassRegistry::getPassRegistry());
}
@@ -53,7 +84,7 @@ char SIOptimizeExecMasking::ID = 0;
char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID;
/// If \p MI is a copy from exec, return the register copied to.
-static Register isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) {
+Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case AMDGPU::COPY:
case AMDGPU::S_MOV_B64:
@@ -61,8 +92,7 @@ static Register isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) {
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B32_term: {
const MachineOperand &Src = MI.getOperand(1);
- if (Src.isReg() &&
- Src.getReg() == (ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC))
+ if (Src.isReg() && Src.getReg() == TRI->getExec())
return MI.getOperand(0).getReg();
}
}
@@ -71,14 +101,13 @@ static Register isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) {
}
/// If \p MI is a copy to exec, return the register copied from.
-static Register isCopyToExec(const MachineInstr &MI, const GCNSubtarget &ST) {
+Register SIOptimizeExecMasking::isCopyToExec(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case AMDGPU::COPY:
case AMDGPU::S_MOV_B64:
case AMDGPU::S_MOV_B32: {
const MachineOperand &Dst = MI.getOperand(0);
- if (Dst.isReg() &&
- Dst.getReg() == (ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC) &&
+ if (Dst.isReg() && Dst.getReg() == TRI->getExec() &&
MI.getOperand(1).isReg())
return MI.getOperand(1).getReg();
break;
@@ -173,64 +202,64 @@ static unsigned getSaveExecOp(unsigned Opc) {
// These are only terminators to get correct spill code placement during
// register allocation, so turn them back into normal instructions.
-static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
+bool SIOptimizeExecMasking::removeTerminatorBit(MachineInstr &MI) const {
switch (MI.getOpcode()) {
case AMDGPU::S_MOV_B32_term: {
bool RegSrc = MI.getOperand(1).isReg();
- MI.setDesc(TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
+ MI.setDesc(TII->get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
return true;
}
case AMDGPU::S_MOV_B64_term: {
bool RegSrc = MI.getOperand(1).isReg();
- MI.setDesc(TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64));
+ MI.setDesc(TII->get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64));
return true;
}
case AMDGPU::S_XOR_B64_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
- MI.setDesc(TII.get(AMDGPU::S_XOR_B64));
+ MI.setDesc(TII->get(AMDGPU::S_XOR_B64));
return true;
}
case AMDGPU::S_XOR_B32_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
- MI.setDesc(TII.get(AMDGPU::S_XOR_B32));
+ MI.setDesc(TII->get(AMDGPU::S_XOR_B32));
return true;
}
case AMDGPU::S_OR_B64_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
- MI.setDesc(TII.get(AMDGPU::S_OR_B64));
+ MI.setDesc(TII->get(AMDGPU::S_OR_B64));
return true;
}
case AMDGPU::S_OR_B32_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
- MI.setDesc(TII.get(AMDGPU::S_OR_B32));
+ MI.setDesc(TII->get(AMDGPU::S_OR_B32));
return true;
}
case AMDGPU::S_ANDN2_B64_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
- MI.setDesc(TII.get(AMDGPU::S_ANDN2_B64));
+ MI.setDesc(TII->get(AMDGPU::S_ANDN2_B64));
return true;
}
case AMDGPU::S_ANDN2_B32_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
- MI.setDesc(TII.get(AMDGPU::S_ANDN2_B32));
+ MI.setDesc(TII->get(AMDGPU::S_ANDN2_B32));
return true;
}
case AMDGPU::S_AND_B64_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
- MI.setDesc(TII.get(AMDGPU::S_AND_B64));
+ MI.setDesc(TII->get(AMDGPU::S_AND_B64));
return true;
}
case AMDGPU::S_AND_B32_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
- MI.setDesc(TII.get(AMDGPU::S_AND_B32));
+ MI.setDesc(TII->get(AMDGPU::S_AND_B32));
return true;
}
default:
@@ -241,9 +270,8 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
// Turn all pseudoterminators in the block into their equivalent non-terminator
// instructions. Returns the reverse iterator to the first non-terminator
// instruction in the block.
-static MachineBasicBlock::reverse_iterator fixTerminators(
- const SIInstrInfo &TII,
- MachineBasicBlock &MBB) {
+MachineBasicBlock::reverse_iterator
+SIOptimizeExecMasking::fixTerminators(MachineBasicBlock &MBB) const {
MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
bool Seen = false;
@@ -252,7 +280,7 @@ static MachineBasicBlock::reverse_iterator fixTerminators(
if (!I->isTerminator())
return Seen ? FirstNonTerm : I;
- if (removeTerminatorBit(TII, *I)) {
+ if (removeTerminatorBit(*I)) {
if (!Seen) {
FirstNonTerm = I;
Seen = true;
@@ -263,17 +291,15 @@ static MachineBasicBlock::reverse_iterator fixTerminators(
return FirstNonTerm;
}
-static MachineBasicBlock::reverse_iterator findExecCopy(
- const SIInstrInfo &TII,
- const GCNSubtarget &ST,
- MachineBasicBlock &MBB,
- MachineBasicBlock::reverse_iterator I,
- unsigned CopyToExec) {
+MachineBasicBlock::reverse_iterator
+SIOptimizeExecMasking::findExecCopy(MachineBasicBlock &MBB,
+ MachineBasicBlock::reverse_iterator I,
+ unsigned CopyToExec) const {
const unsigned InstLimit = 25;
auto E = MBB.rend();
for (unsigned N = 0; N <= InstLimit && I != E; ++I, ++N) {
- Register CopyFromExec = isCopyFromExec(*I, ST);
+ Register CopyFromExec = isCopyFromExec(*I);
if (CopyFromExec.isValid())
return I;
}
@@ -298,11 +324,9 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
// an arbitrary condition based on the current MachineInstr, for instance an
// target instruction. Breaks prematurely by returning nullptr if one of the
// registers given in NonModifiableRegs is modified by the current instruction.
-static MachineInstr *
-findInstrBackwards(MachineInstr &Origin,
- std::function<bool(MachineInstr *)> Pred,
- ArrayRef<MCRegister> NonModifiableRegs,
- const SIRegisterInfo *TRI, unsigned MaxInstructions = 20) {
+MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
+ MachineInstr &Origin, std::function<bool(MachineInstr *)> Pred,
+ ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions) const {
MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(),
E = Origin.getParent()->rend();
unsigned CurrentIteration = 0;
@@ -310,7 +334,7 @@ findInstrBackwards(MachineInstr &Origin,
for (++A; CurrentIteration < MaxInstructions && A != E; ++A) {
if (A->isDebugInstr())
continue;
-
+
if (Pred(&*A))
return &*A;
@@ -318,209 +342,64 @@ findInstrBackwards(MachineInstr &Origin,
if (A->modifiesRegister(Reg, TRI))
return nullptr;
}
-
+
++CurrentIteration;
}
return nullptr;
}
-
// Determine if a register Reg is not re-defined and still in use
// in the range (Stop..Start].
// It does so by backwards calculating liveness from the end of the BB until
// either Stop or the beginning of the BB is reached.
// After liveness is calculated, we can determine if Reg is still in use and not
// defined inbetween the instructions.
-static bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start,
- MCRegister Reg, const SIRegisterInfo *TRI,
- MachineRegisterInfo &MRI,
- bool useLiveOuts = false,
- bool ignoreStart = false) {
+bool SIOptimizeExecMasking::isRegisterInUseBetween(MachineInstr &Stop,
+ MachineInstr &Start,
+ MCRegister Reg,
+ bool UseLiveOuts,
+ bool IgnoreStart) const {
LivePhysRegs LR(*TRI);
- if (useLiveOuts)
+ if (UseLiveOuts)
LR.addLiveOuts(*Stop.getParent());
MachineBasicBlock::reverse_iterator A(Start);
MachineBasicBlock::reverse_iterator E(Stop);
- if (ignoreStart)
+ if (IgnoreStart)
++A;
for (; A != Stop.getParent()->rend() && A != Stop; ++A) {
LR.stepBackward(*A);
}
- return !LR.available(MRI, Reg);
+ return !LR.available(*MRI, Reg);
}
// Determine if a register Reg is not re-defined and still in use
// in the range (Stop..BB.end].
-static bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg,
- const SIRegisterInfo *TRI,
- MachineRegisterInfo &MRI) {
- return isRegisterInUseBetween(Stop, *Stop.getParent()->rbegin(), Reg, TRI,
- MRI, true);
-}
-
-// Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence
-// by looking at an instance of a s_and_saveexec instruction. Returns a pointer
-// to the v_cmp instruction if it is safe to replace the sequence (see the
-// conditions in the function body). This is after register allocation, so some
-// checks on operand dependencies need to be considered.
-static MachineInstr *findPossibleVCMPVCMPXOptimization(
- MachineInstr &SaveExec, MCRegister Exec, const SIRegisterInfo *TRI,
- const SIInstrInfo *TII, MachineRegisterInfo &MRI) {
-
- MachineInstr *VCmp = nullptr;
-
- Register SaveExecDest = SaveExec.getOperand(0).getReg();
- if (!TRI->isSGPRReg(MRI, SaveExecDest))
- return nullptr;
-
- MachineOperand *SaveExecSrc0 =
- TII->getNamedOperand(SaveExec, AMDGPU::OpName::src0);
- if (!SaveExecSrc0->isReg())
- return nullptr;
-
- // Try to find the last v_cmp instruction that defs the saveexec input
- // operand without any write to Exec or the saveexec input operand inbetween.
- VCmp = findInstrBackwards(
- SaveExec,
- [&](MachineInstr *Check) {
- return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 &&
- Check->modifiesRegister(SaveExecSrc0->getReg(), TRI);
- },
- {Exec, SaveExecSrc0->getReg()}, TRI);
-
- if (!VCmp)
- return nullptr;
-
- MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst);
- assert(VCmpDest && "Should have an sdst operand!");
-
- // Check if any of the v_cmp source operands is written by the saveexec.
- MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0);
- if (Src0->isReg() && TRI->isSGPRReg(MRI, Src0->getReg()) &&
- SaveExec.modifiesRegister(Src0->getReg(), TRI))
- return nullptr;
-
- MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1);
- if (Src1->isReg() && TRI->isSGPRReg(MRI, Src1->getReg()) &&
- SaveExec.modifiesRegister(Src1->getReg(), TRI))
- return nullptr;
-
- // Don't do the transformation if the destination operand is included in
- // it's MBB Live-outs, meaning it's used in any of it's successors, leading
- // to incorrect code if the v_cmp and therefore the def of
- // the dest operand is removed.
- if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg()))
- return nullptr;
-
- // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the
- // s_and_saveexec, skip the optimization.
- if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), TRI, MRI,
- false, true) ||
- isRegisterInUseAfter(SaveExec, VCmpDest->getReg(), TRI, MRI))
- return nullptr;
-
- // Try to determine if there is a write to any of the VCmp
- // operands between the saveexec and the vcmp.
- // If yes, additional VGPR spilling might need to be inserted. In this case,
- // it's not worth replacing the instruction sequence.
- SmallVector<MCRegister, 2> NonDefRegs;
- if (Src0->isReg())
- NonDefRegs.push_back(Src0->getReg());
-
- if (Src1->isReg())
- NonDefRegs.push_back(Src1->getReg());
-
- if (!findInstrBackwards(
- SaveExec, [&](MachineInstr *Check) { return Check == VCmp; },
- NonDefRegs, TRI))
- return nullptr;
-
- return VCmp;
-}
-
-// Inserts the optimized s_mov_b32 / v_cmpx sequence based on the
-// operands extracted from a v_cmp ..., s_and_saveexec pattern.
-static bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr,
- MachineInstr &VCmp, MCRegister Exec,
- const SIInstrInfo *TII,
- const SIRegisterInfo *TRI,
- MachineRegisterInfo &MRI) {
- const int NewOpcode = AMDGPU::getVCMPXOpFromVCMP(VCmp.getOpcode());
-
- if (NewOpcode == -1)
- return false;
-
- MachineOperand *Src0 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src0);
- MachineOperand *Src1 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src1);
-
- Register MoveDest = SaveExecInstr.getOperand(0).getReg();
-
- MachineBasicBlock::instr_iterator InsertPosIt = SaveExecInstr.getIterator();
- if (!SaveExecInstr.uses().empty()) {
- bool isSGPR32 = TRI->getRegSizeInBits(MoveDest, MRI) == 32;
- unsigned MovOpcode = isSGPR32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- BuildMI(*SaveExecInstr.getParent(), InsertPosIt,
- SaveExecInstr.getDebugLoc(), TII->get(MovOpcode), MoveDest)
- .addReg(Exec);
- }
-
- // Omit dst as V_CMPX is implicitly writing to EXEC.
- // Add dummy src and clamp modifiers, if needed.
- auto Builder = BuildMI(*VCmp.getParent(), std::next(InsertPosIt),
- VCmp.getDebugLoc(), TII->get(NewOpcode));
-
- auto TryAddImmediateValueFromNamedOperand =
- [&](unsigned OperandName) -> void {
- if (auto *Mod = TII->getNamedOperand(VCmp, OperandName))
- Builder.addImm(Mod->getImm());
- };
-
- TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src0_modifiers);
- Builder.add(*Src0);
-
- TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src1_modifiers);
- Builder.add(*Src1);
-
- TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::clamp);
-
- // The kill flags may no longer be correct.
- if (Src0->isReg())
- MRI.clearKillFlags(Src0->getReg());
- if (Src1->isReg())
- MRI.clearKillFlags(Src1->getReg());
-
- return true;
+bool SIOptimizeExecMasking::isRegisterInUseAfter(MachineInstr &Stop,
+ MCRegister Reg) const {
+ return isRegisterInUseBetween(Stop, *Stop.getParent()->rbegin(), Reg, true);
}
-bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
-
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const SIInstrInfo *TII = ST.getInstrInfo();
- MachineRegisterInfo *MRI = &MF.getRegInfo();
- MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
-
- // Optimize sequences emitted for control flow lowering. They are originally
- // emitted as the separate operations because spill code may need to be
- // inserted for the saved copy of exec.
- //
- // x = copy exec
- // z = s_<op>_b64 x, y
- // exec = copy z
- // =>
- // x = s_<op>_saveexec_b64 y
- //
+// Optimize sequences emitted for control flow lowering. They are originally
+// emitted as the separate operations because spill code may need to be
+// inserted for the saved copy of exec.
+//
+// x = copy exec
+// z = s_<op>_b64 x, y
+// exec = copy z
+// =>
+// x = s_<op>_saveexec_b64 y
+//
+bool SIOptimizeExecMasking::optimizeExecSequence() const {
+ MCRegister Exec = TRI->getExec();
bool Changed = false;
- for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB);
+ for (MachineBasicBlock &MBB : *MF) {
+ MachineBasicBlock::reverse_iterator I = fixTerminators(MBB);
MachineBasicBlock::reverse_iterator E = MBB.rend();
if (I == E)
continue;
@@ -532,7 +411,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
unsigned SearchCount = 0;
const unsigned SearchLimit = 5;
while (I != E && SearchCount++ < SearchLimit) {
- CopyToExec = isCopyToExec(*I, ST);
+ CopyToExec = isCopyToExec(*I);
if (CopyToExec)
break;
++I;
@@ -542,8 +421,8 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
continue;
// Scan backwards to find the def.
- auto CopyToExecInst = &*I;
- auto CopyFromExecInst = findExecCopy(*TII, ST, MBB, I, CopyToExec);
+ auto *CopyToExecInst = &*I;
+ auto CopyFromExecInst = findExecCopy(MBB, I, CopyToExec);
if (CopyFromExecInst == E) {
auto PrepareExecInst = std::next(I);
if (PrepareExecInst == E)
@@ -574,8 +453,9 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *SaveExecInst = nullptr;
SmallVector<MachineInstr *, 4> OtherUseInsts;
- for (MachineBasicBlock::iterator J
- = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
+ for (MachineBasicBlock::iterator
+ J = std::next(CopyFromExecInst->getIterator()),
+ JE = I->getIterator();
J != JE; ++J) {
if (SaveExecInst && J->readsRegister(Exec, TRI)) {
LLVM_DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
@@ -655,58 +535,210 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())),
CopyFromExec)
- .addReg(OtherOp->getReg());
+ .addReg(OtherOp->getReg());
SaveExecInst->eraseFromParent();
CopyToExecInst->eraseFromParent();
for (MachineInstr *OtherInst : OtherUseInsts) {
- OtherInst->substituteRegister(CopyToExec, Exec,
- AMDGPU::NoSubRegister, *TRI);
+ OtherInst->substituteRegister(CopyToExec, Exec, AMDGPU::NoSubRegister,
+ *TRI);
}
Changed = true;
}
- // After all s_op_saveexec instructions are inserted,
- // replace (on GFX10.3 and later)
- // v_cmp_* SGPR, IMM, VGPR
- // s_and_saveexec_b32 EXEC_SGPR_DEST, SGPR
- // with
- // s_mov_b32 EXEC_SGPR_DEST, exec_lo
- // v_cmpx_* IMM, VGPR
- // to reduce pipeline stalls.
- if (ST.hasGFX10_3Insts()) {
- DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping;
- const unsigned AndSaveExecOpcode =
- ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
+ return Changed;
+}
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- // Record relevant v_cmp / s_and_saveexec instruction pairs for
- // replacement.
- if (MI.getOpcode() != AndSaveExecOpcode)
- continue;
+// Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence
+// by looking at an instance of a s_and_saveexec instruction. Returns a pointer
+// to the v_cmp instruction if it is safe to replace the sequence (see the
+// conditions in the function body). This is after register allocation, so some
+// checks on operand dependencies need to be considered.
+MachineInstr *SIOptimizeExecMasking::findPossibleVCMPVCMPXOptimization(
+ MachineInstr &SaveExec, MCRegister Exec) const {
- if (MachineInstr *VCmp =
- findPossibleVCMPVCMPXOptimization(MI, Exec, TRI, TII, *MRI))
- SaveExecVCmpMapping[&MI] = VCmp;
- }
+ MachineInstr *VCmp = nullptr;
+
+ Register SaveExecDest = SaveExec.getOperand(0).getReg();
+ if (!TRI->isSGPRReg(*MRI, SaveExecDest))
+ return nullptr;
+
+ MachineOperand *SaveExecSrc0 =
+ TII->getNamedOperand(SaveExec, AMDGPU::OpName::src0);
+ if (!SaveExecSrc0->isReg())
+ return nullptr;
+
+ // Try to find the last v_cmp instruction that defs the saveexec input
+ // operand without any write to Exec or the saveexec input operand inbetween.
+ VCmp = findInstrBackwards(
+ SaveExec,
+ [&](MachineInstr *Check) {
+ return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 &&
+ Check->modifiesRegister(SaveExecSrc0->getReg(), TRI);
+ },
+ {Exec, SaveExecSrc0->getReg()});
+
+ if (!VCmp)
+ return nullptr;
+
+ MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst);
+ assert(VCmpDest && "Should have an sdst operand!");
+
+ // Check if any of the v_cmp source operands is written by the saveexec.
+ MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0);
+ if (Src0->isReg() && TRI->isSGPRReg(*MRI, Src0->getReg()) &&
+ SaveExec.modifiesRegister(Src0->getReg(), TRI))
+ return nullptr;
+
+ MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1);
+ if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) &&
+ SaveExec.modifiesRegister(Src1->getReg(), TRI))
+ return nullptr;
+
+ // Don't do the transformation if the destination operand is included in
+ // it's MBB Live-outs, meaning it's used in any of it's successors, leading
+ // to incorrect code if the v_cmp and therefore the def of
+ // the dest operand is removed.
+ if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg()))
+ return nullptr;
+
+ // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the
+ // s_and_saveexec, skip the optimization.
+ if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), false,
+ true) ||
+ isRegisterInUseAfter(SaveExec, VCmpDest->getReg()))
+ return nullptr;
+
+ // Try to determine if there is a write to any of the VCmp
+ // operands between the saveexec and the vcmp.
+ // If yes, additional VGPR spilling might need to be inserted. In this case,
+ // it's not worth replacing the instruction sequence.
+ SmallVector<MCRegister, 2> NonDefRegs;
+ if (Src0->isReg())
+ NonDefRegs.push_back(Src0->getReg());
+
+ if (Src1->isReg())
+ NonDefRegs.push_back(Src1->getReg());
+
+ if (!findInstrBackwards(
+ SaveExec, [&](MachineInstr *Check) { return Check == VCmp; },
+ NonDefRegs))
+ return nullptr;
+
+ return VCmp;
+}
+
+// Inserts the optimized s_mov_b32 / v_cmpx sequence based on the
+// operands extracted from a v_cmp ..., s_and_saveexec pattern.
+bool SIOptimizeExecMasking::optimizeSingleVCMPSaveExecSequence(
+ MachineInstr &SaveExecInstr, MachineInstr &VCmp, MCRegister Exec) const {
+ const int NewOpcode = AMDGPU::getVCMPXOpFromVCMP(VCmp.getOpcode());
+
+ if (NewOpcode == -1)
+ return false;
+
+ MachineOperand *Src0 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src0);
+ MachineOperand *Src1 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src1);
+
+ Register MoveDest = SaveExecInstr.getOperand(0).getReg();
+
+ MachineBasicBlock::instr_iterator InsertPosIt = SaveExecInstr.getIterator();
+ if (!SaveExecInstr.uses().empty()) {
+ bool IsSGPR32 = TRI->getRegSizeInBits(MoveDest, *MRI) == 32;
+ unsigned MovOpcode = IsSGPR32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ BuildMI(*SaveExecInstr.getParent(), InsertPosIt,
+ SaveExecInstr.getDebugLoc(), TII->get(MovOpcode), MoveDest)
+ .addReg(Exec);
+ }
+
+ // Omit dst as V_CMPX is implicitly writing to EXEC.
+ // Add dummy src and clamp modifiers, if needed.
+ auto Builder = BuildMI(*VCmp.getParent(), std::next(InsertPosIt),
+ VCmp.getDebugLoc(), TII->get(NewOpcode));
+
+ auto TryAddImmediateValueFromNamedOperand =
+ [&](unsigned OperandName) -> void {
+ if (auto *Mod = TII->getNamedOperand(VCmp, OperandName))
+ Builder.addImm(Mod->getImm());
+ };
+
+ TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src0_modifiers);
+ Builder.add(*Src0);
+
+ TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src1_modifiers);
+ Builder.add(*Src1);
+
+ TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::clamp);
+
+ // The kill flags may no longer be correct.
+ if (Src0->isReg())
+ MRI->clearKillFlags(Src0->getReg());
+ if (Src1->isReg())
+ MRI->clearKillFlags(Src1->getReg());
+
+ return true;
+}
+
+// After all s_op_saveexec instructions are inserted,
+// replace (on GFX10.3 and later)
+// v_cmp_* SGPR, IMM, VGPR
+// s_and_saveexec_b32 EXEC_SGPR_DEST, SGPR
+// with
+// s_mov_b32 EXEC_SGPR_DEST, exec_lo
+// v_cmpx_* IMM, VGPR
+// to reduce pipeline stalls.
+bool SIOptimizeExecMasking::optimizeVCmpxAndSaveexecSequence() const {
+ if (!ST->hasGFX10_3Insts())
+ return false;
+
+ bool Changed = false;
+
+ DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping;
+ MCRegister Exec = TRI->getExec();
+ const unsigned AndSaveExecOpcode =
+ ST->isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ // Record relevant v_cmp / s_and_saveexec instruction pairs for
+ // replacement.
+ if (MI.getOpcode() != AndSaveExecOpcode)
+ continue;
+
+ if (MachineInstr *VCmp = findPossibleVCMPVCMPXOptimization(MI, Exec))
+ SaveExecVCmpMapping[&MI] = VCmp;
}
+ }
- for (const auto &Entry : SaveExecVCmpMapping) {
- MachineInstr *SaveExecInstr = Entry.getFirst();
- MachineInstr *VCmpInstr = Entry.getSecond();
+ for (const auto &Entry : SaveExecVCmpMapping) {
+ MachineInstr *SaveExecInstr = Entry.getFirst();
+ MachineInstr *VCmpInstr = Entry.getSecond();
- if (optimizeVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec, TII,
- TRI, *MRI)) {
- SaveExecInstr->eraseFromParent();
- VCmpInstr->eraseFromParent();
+ if (optimizeSingleVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec)) {
+ SaveExecInstr->eraseFromParent();
+ VCmpInstr->eraseFromParent();
- Changed = true;
- }
+ Changed = true;
}
}
return Changed;
}
+
+bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ this->MF = &MF;
+ ST = &MF.getSubtarget<GCNSubtarget>();
+ TRI = ST->getRegisterInfo();
+ TII = ST->getInstrInfo();
+ MRI = &MF.getRegInfo();
+
+ bool Changed = optimizeExecSequence();
+ Changed |= optimizeVCmpxAndSaveexecSequence();
+
+ return Changed;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index e5e65a8dbbf1..57dbad468de8 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -159,6 +159,9 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
return false;
Register SelReg = Op1->getReg();
+ if (SelReg.isPhysical())
+ return false;
+
auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS);
if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
return false;
@@ -264,13 +267,11 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
// Try to remove v_cndmask_b32.
if (SelLI) {
- bool CanRemoveSel = SelLI->Query(CmpIdx.getRegSlot()).isKill();
- if (!CanRemoveSel) {
- // Try to shrink the live interval and check for dead def instead.
- LIS->shrinkToUses(SelLI, nullptr);
- CanRemoveSel = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
- }
- if (CanRemoveSel) {
+ // Kill status must be checked before shrinking the live range.
+ bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
+ LIS->shrinkToUses(SelLI);
+ bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
+ if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index ad1455ed20fd..b32d5bb04d5b 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2933,6 +2933,10 @@ MCRegister SIRegisterInfo::getVCC() const {
return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
}
+MCRegister SIRegisterInfo::getExec() const {
+ return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+}
+
const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
// VGPR tuples have an alignment requirement on gfx90a variants.
return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 9bfbc253410b..6024158be181 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -344,6 +344,8 @@ public:
MCRegister getVCC() const;
+ MCRegister getExec() const;
+
const TargetRegisterClass *getRegClass(unsigned RCID) const;
// Find reaching register definition
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index e4ab72f1095b..2f334e211181 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -277,6 +277,18 @@ struct VOPC64DPPInfo {
uint16_t Opcode;
};
+struct VOPDComponentInfo {
+ uint16_t BaseVOP;
+ uint16_t VOPDOp;
+ bool CanBeVOPDX;
+};
+
+struct VOPDInfo {
+ uint16_t Opcode;
+ uint16_t OpX;
+ uint16_t OpY;
+};
+
#define GET_MTBUFInfoTable_DECL
#define GET_MTBUFInfoTable_IMPL
#define GET_MUBUFInfoTable_DECL
@@ -293,6 +305,10 @@ struct VOPC64DPPInfo {
#define GET_VOPC64DPPTable_IMPL
#define GET_VOPC64DPP8Table_DECL
#define GET_VOPC64DPP8Table_IMPL
+#define GET_VOPDComponentTable_DECL
+#define GET_VOPDComponentTable_IMPL
+#define GET_VOPDPairs_DECL
+#define GET_VOPDPairs_IMPL
#define GET_WMMAOpcode2AddrMappingTable_DECL
#define GET_WMMAOpcode2AddrMappingTable_IMPL
#define GET_WMMAOpcode3AddrMappingTable_DECL
@@ -398,6 +414,19 @@ bool getMAIIsGFX940XDL(unsigned Opc) {
return Info ? Info->is_gfx940_xdl : false;
}
+CanBeVOPD getCanBeVOPD(unsigned Opc) {
+ const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
+ if (Info)
+ return {Info->CanBeVOPDX, 1};
+ else
+ return {0, 0};
+}
+
+unsigned getVOPDOpcode(unsigned Opc) {
+ const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
+ return Info ? Info->VOPDOp : ~0u;
+}
+
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
return Info ? Info->Opcode3Addr : ~0u;
@@ -415,6 +444,11 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
}
+int getVOPDFull(unsigned OpX, unsigned OpY) {
+ const VOPDInfo *Info = getVOPDInfoFromComponentOpcodes(OpX, OpY);
+ return Info ? Info->Opcode : -1;
+}
+
namespace IsaInfo {
AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index dffeec10a14a..51cf1678207c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -470,6 +470,14 @@ bool getMAIIsDGEMM(unsigned Opc);
LLVM_READONLY
bool getMAIIsGFX940XDL(unsigned Opc);
+struct CanBeVOPD {
+ bool X;
+ bool Y;
+};
+
+LLVM_READONLY
+CanBeVOPD getCanBeVOPD(unsigned Opc);
+
LLVM_READONLY
const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
uint8_t NumComponents,
@@ -483,6 +491,12 @@ LLVM_READONLY
int getMCOpcode(uint16_t Opcode, unsigned Gen);
LLVM_READONLY
+unsigned getVOPDOpcode(unsigned Opc);
+
+LLVM_READONLY
+int getVOPDFull(unsigned OpX, unsigned OpY);
+
+LLVM_READONLY
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
LLVM_READONLY
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 1485a1e63129..b24857edb59a 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -495,9 +495,9 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins FI:$fi));
let InsDPP8 = (ins DstRCDPP:$old,
- Src0DPP:$src0,
- Src1DPP:$src1,
- dpp8:$dpp8, FI:$fi);
+ Src0DPP:$src0,
+ Src1DPP:$src1,
+ dpp8:$dpp8, FI:$fi);
let HasExt = 1;
let HasExtDPP = 1;
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index eb6c54a45263..33d3441e94c2 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1108,7 +1108,6 @@ class VOPC64_DPP_Base<bits<10> op, string OpName, VOPProfile P>
// Inst{87-84} ignored by hw
let Inst{91-88} = bank_mask;
let Inst{95-92} = row_mask;
-
}
class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
@@ -1148,7 +1147,6 @@ class VOPC64_DPP8_Base<bits<10> op, string OpName, VOPProfile P>
let Inst{40-32} = fi;
let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
let Inst{95-72} = dpp8{23-0};
-
}
class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 8cd3d2fe2c47..187485ffa3ae 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1215,7 +1215,9 @@ class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VO
let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
- let HasModifiers = !if(Features.IsMAI, 0, !or(Features.IsPacked, P.HasModifiers));
+ let HasModifiers =
+ !if (Features.IsMAI, 0,
+ !or(Features.IsPacked, Features.HasOpSel, P.HasModifiers));
}
class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_Base<P, Features> {
@@ -1414,7 +1416,7 @@ multiclass VOP3_Realtriple_with_name_gfx11<bits<10> op, string opName,
VOP3_Real_dpp8_with_name_gfx11<op, opName, asmName>;
multiclass VOP3Only_Realtriple_with_name_gfx11<bits<10> op, string opName,
- string asmName> :
+ string asmName> :
VOP3_Realtriple_with_name_gfx11<op, opName, asmName, 1>;
multiclass VOP3be_Realtriple_gfx11<
diff --git a/llvm/lib/Target/ARC/ARCAsmPrinter.cpp b/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
index 0390c01eecb1..cee2fc7d2bf0 100644
--- a/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
+++ b/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
@@ -49,6 +49,9 @@ public:
} // end anonymous namespace
void ARCAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ ARC_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
SmallString<128> Str;
raw_svector_ostream O(Str);
diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
index d4f74fa77fc4..36b00af2c0b4 100644
--- a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
@@ -26,6 +26,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "ARCGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h
index ab06ce46d99f..5f83b48b36af 100644
--- a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h
+++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h
@@ -28,6 +28,7 @@ class Target;
// Defines symbolic names for the ARC instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "ARCGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 48559a89a30a..73970b9c74c5 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -378,13 +378,13 @@ def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true
def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2",
"Prefer 32-bit alignment for loops">;
-def FeatureMVEVectorCostFactor1 : SubtargetFeature<"mve1beat", "MVEVectorCostFactor", "1",
+def FeatureMVEVectorCostFactor1 : SubtargetFeature<"mve1beat", "MVEVectorCostFactor", "4",
"Model MVE instructions as a 1 beat per tick architecture">;
def FeatureMVEVectorCostFactor2 : SubtargetFeature<"mve2beat", "MVEVectorCostFactor", "2",
"Model MVE instructions as a 2 beats per tick architecture">;
-def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFactor", "4",
+def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFactor", "1",
"Model MVE instructions as a 4 beats per tick architecture">;
/// Some instructions update CPSR partially, which can add false dependency for
@@ -1450,6 +1450,13 @@ def : ProcessorModel<"cortex-m55", CortexM4Model, [ARMv81mMainline,
HasMVEFloatOps,
FeatureFixCMSE_CVE_2021_35465]>;
+def : ProcessorModel<"cortex-m85", CortexM7Model, [ARMv81mMainline,
+ FeatureDSP,
+ FeatureFPARMv8_D16,
+ FeaturePACBTI,
+ FeatureUseMISched,
+ HasMVEFloatOps]>;
+
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDivThumb,
FeatureHWDivARM,
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 4aa28bc5d28d..57cbd7a3b2b8 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1337,6 +1337,10 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ // TODOD FIXME: Enable feature predicate checks once all the test pass.
+ // ARM_MC::verifyInstructionPredicates(MI->getOpcode(),
+ // getSubtargetInfo().getFeatureBits());
+
const DataLayout &DL = getDataLayout();
MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 85e32c08c74c..e6be93e6480a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -450,6 +450,14 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+
+ if (!HasMVEFP) {
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ }
}
setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand);
@@ -13350,14 +13358,14 @@ static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG) {
// to make better use of vaddva style instructions.
if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
- !isa<ConstantSDNode>(N0)) {
+ !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
}
// And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
// add(add(add(A, C), reduce(B)), reduce(D))
if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
- N1.getOpcode() == ISD::ADD) {
+ N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
unsigned N0RedOp = 0;
if (!IsVecReduce(N0.getOperand(N0RedOp))) {
N0RedOp = 1;
@@ -13424,7 +13432,7 @@ static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG) {
};
SDValue X;
- if (N0.getOpcode() == ISD::ADD) {
+ if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
N0.getOperand(1).getOperand(0));
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 3a9946ee810b..ba1d806c8d81 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2247,15 +2247,15 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
return canTailPredicateLoop(L, LI, SE, DL, LAI);
}
-bool ARMTTIImpl::emitGetActiveLaneMask() const {
+PredicationStyle ARMTTIImpl::emitGetActiveLaneMask() const {
if (!ST->hasMVEIntegerOps() || !EnableTailPredication)
- return false;
+ return PredicationStyle::None;
// Intrinsic @llvm.get.active.lane.mask is supported.
// It is used in the MVETailPredication pass, which requires the number of
// elements processed by this vector loop to setup the tail-predicated
// loop.
- return true;
+ return PredicationStyle::Data;
}
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index d7a2bdb3db15..dcf82e703a7f 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -298,7 +298,7 @@ public:
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);
- bool emitGetActiveLaneMask() const;
+ PredicationStyle emitGetActiveLaneMask() const;
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 3f1379f135d1..9f85d72cc810 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -133,6 +133,7 @@ static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
}
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "ARMGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index e0c992f4fae2..3066d9ba6783 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -139,6 +139,7 @@ bool isCDECoproc(size_t Coproc, const MCSubtargetInfo &STI);
// Defines symbolic names for the ARM instructions.
//
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "ARMGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index 30785340ef12..296801094fbe 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -351,13 +351,13 @@ Optional<int64_t> MVEGatherScatterLowering::getIfConst(const Value *V) {
if (!Op0 || !Op1)
return Optional<int64_t>{};
if (I->getOpcode() == Instruction::Add)
- return Optional<int64_t>{Op0.getValue() + Op1.getValue()};
+ return Optional<int64_t>{Op0.value() + Op1.value()};
if (I->getOpcode() == Instruction::Mul)
- return Optional<int64_t>{Op0.getValue() * Op1.getValue()};
+ return Optional<int64_t>{Op0.value() * Op1.value()};
if (I->getOpcode() == Instruction::Shl)
- return Optional<int64_t>{Op0.getValue() << Op1.getValue()};
+ return Optional<int64_t>{Op0.value() << Op1.value()};
if (I->getOpcode() == Instruction::Or)
- return Optional<int64_t>{Op0.getValue() | Op1.getValue()};
+ return Optional<int64_t>{Op0.value() | Op1.value()};
}
return Optional<int64_t>{};
}
diff --git a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
index 0001e520b1fb..70fc90bf9eb5 100644
--- a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -180,6 +180,10 @@ bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void AVRAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ // FIXME: Enable feature predicate checks once all the test pass.
+ // AVR_MC::verifyInstructionPredicates(MI->getOpcode(),
+ // getSubtargetInfo().getFeatureBits());
+
AVRMCInstLower MCInstLowering(OutContext, *this);
MCInst I;
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
index cdfe4a21105d..ba370261e284 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -27,6 +27,7 @@
#include "llvm/MC/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "AVRGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
index aaf236d82016..e83d674f87cc 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
@@ -49,6 +49,7 @@ std::unique_ptr<MCObjectTargetWriter> createAVRELFObjectWriter(uint8_t OSABI);
#include "AVRGenRegisterInfo.inc"
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "AVRGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 349cdd92ae62..9aad9375d913 100644
--- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -149,6 +149,13 @@ private:
// The base call is not an input of any other preserve_*
// intrinsics.
std::map<CallInst *, CallInfo> BaseAICalls;
+ // A map to hold <AnonRecord, TypeDef> relationships
+ std::map<DICompositeType *, DIDerivedType *> AnonRecords;
+
+ void CheckAnonRecordType(DIDerivedType *ParentTy, DIType *Ty);
+ void CheckCompositeType(DIDerivedType *ParentTy, DICompositeType *CTy);
+ void CheckDerivedType(DIDerivedType *ParentTy, DIDerivedType *DTy);
+ void ResetMetadata(struct CallInfo &CInfo);
bool doTransformation(Function &F);
@@ -221,10 +228,80 @@ bool BPFAbstractMemberAccess::run(Function &F) {
if (M->debug_compile_units().empty())
return false;
+ // For each argument/return/local_variable type, trace the type
+ // pattern like '[derived_type]* [composite_type]' to check
+ // and remember (anon record -> typedef) relations where the
+ // anon record is defined as
+ // typedef [const/volatile/restrict]* [anon record]
+ DISubprogram *SP = F.getSubprogram();
+ if (SP && SP->isDefinition()) {
+ for (DIType *Ty: SP->getType()->getTypeArray())
+ CheckAnonRecordType(nullptr, Ty);
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (const auto *DV = dyn_cast<DILocalVariable>(DN))
+ CheckAnonRecordType(nullptr, DV->getType());
+ }
+ }
+
DL = &M->getDataLayout();
return doTransformation(F);
}
+void BPFAbstractMemberAccess::ResetMetadata(struct CallInfo &CInfo) {
+ if (auto Ty = dyn_cast<DICompositeType>(CInfo.Metadata)) {
+ if (AnonRecords.find(Ty) != AnonRecords.end()) {
+ if (AnonRecords[Ty] != nullptr)
+ CInfo.Metadata = AnonRecords[Ty];
+ }
+ }
+}
+
+void BPFAbstractMemberAccess::CheckCompositeType(DIDerivedType *ParentTy,
+ DICompositeType *CTy) {
+ if (!CTy->getName().empty() || !ParentTy ||
+ ParentTy->getTag() != dwarf::DW_TAG_typedef)
+ return;
+
+ if (AnonRecords.find(CTy) == AnonRecords.end()) {
+ AnonRecords[CTy] = ParentTy;
+ return;
+ }
+
+ // Two or more typedef's may point to the same anon record.
+ // If this is the case, set the typedef DIType to be nullptr
+ // to indicate the duplication case.
+ DIDerivedType *CurrTy = AnonRecords[CTy];
+ if (CurrTy == ParentTy)
+ return;
+ AnonRecords[CTy] = nullptr;
+}
+
+void BPFAbstractMemberAccess::CheckDerivedType(DIDerivedType *ParentTy,
+ DIDerivedType *DTy) {
+ DIType *BaseType = DTy->getBaseType();
+ if (!BaseType)
+ return;
+
+ unsigned Tag = DTy->getTag();
+ if (Tag == dwarf::DW_TAG_pointer_type)
+ CheckAnonRecordType(nullptr, BaseType);
+ else if (Tag == dwarf::DW_TAG_typedef)
+ CheckAnonRecordType(DTy, BaseType);
+ else
+ CheckAnonRecordType(ParentTy, BaseType);
+}
+
+void BPFAbstractMemberAccess::CheckAnonRecordType(DIDerivedType *ParentTy,
+ DIType *Ty) {
+ if (!Ty)
+ return;
+
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty))
+ return CheckCompositeType(ParentTy, CTy);
+ else if (auto *DTy = dyn_cast<DIDerivedType>(Ty))
+ return CheckDerivedType(ParentTy, DTy);
+}
+
static bool SkipDIDerivedTag(unsigned Tag, bool skipTypedef) {
if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
Tag != dwarf::DW_TAG_volatile_type &&
@@ -298,6 +375,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
if (!CInfo.Metadata)
report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic");
+ ResetMetadata(CInfo);
CInfo.AccessIndex = getConstant(Call->getArgOperand(1));
CInfo.Base = Call->getArgOperand(0);
return true;
@@ -307,6 +385,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index);
if (!CInfo.Metadata)
report_fatal_error("Missing metadata for llvm.preserve.struct.access.index intrinsic");
+ ResetMetadata(CInfo);
CInfo.AccessIndex = getConstant(Call->getArgOperand(2));
CInfo.Base = Call->getArgOperand(0);
CInfo.RecordAlignment = DL->getABITypeAlign(getBaseElementType(Call));
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index d6145f53c170..c8849bd50464 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -138,6 +138,9 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ BPF_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
MCInst TmpInst;
if (!BTF || !BTF->InstLower(MI, TmpInst)) {
diff --git a/llvm/lib/Target/BPF/BTF.h b/llvm/lib/Target/BPF/BTF.h
index 4540054aaf34..89852be4a8c8 100644
--- a/llvm/lib/Target/BPF/BTF.h
+++ b/llvm/lib/Target/BPF/BTF.h
@@ -48,6 +48,8 @@
#ifndef LLVM_LIB_TARGET_BPF_BTF_H
#define LLVM_LIB_TARGET_BPF_BTF_H
+#include <cstdint>
+
namespace llvm {
namespace BTF {
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index a98d001097bc..cb321906db03 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -31,14 +31,13 @@ using namespace llvm;
namespace {
class BPFMCCodeEmitter : public MCCodeEmitter {
- const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
bool IsLittleEndian;
public:
- BPFMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+ BPFMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri,
bool IsLittleEndian)
- : MCII(mcii), MRI(mri), IsLittleEndian(IsLittleEndian) {}
+ : MRI(mri), IsLittleEndian(IsLittleEndian) { }
BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
void operator=(const BPFMCCodeEmitter &) = delete;
~BPFMCCodeEmitter() override = default;
@@ -62,12 +61,6 @@ public:
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
-
-private:
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
@@ -117,9 +110,6 @@ static uint8_t SwapBits(uint8_t Val)
void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
unsigned Opcode = MI.getOpcode();
support::endian::Writer OSE(OS,
IsLittleEndian ? support::little : support::big);
@@ -174,5 +164,4 @@ uint64_t BPFMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
return Encoding;
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "BPFGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 5a1e251cd29c..77db5f99225e 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/Host.h"
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "BPFGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index fc190504581c..ea30e714a5b7 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -54,6 +54,7 @@ std::unique_ptr<MCObjectTargetWriter> createBPFELFObjectWriter(uint8_t OSABI);
// Defines symbolic names for the BPF instructions.
//
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "BPFGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
index 0236b22ad379..ea5b4555757e 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
@@ -141,6 +141,9 @@ void CSKYAsmPrinter::emitEndOfAsmFile(Module &M) {
}
void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ CSKY_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
index 300ecceae906..8d3835b22bb0 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -153,7 +153,7 @@ def CSKYSymbol : AsmOperandClass {
let ParserMethod = "parseCSKYSymbol";
}
-def br_symbol : Operand<iPTR> {
+def br_symbol : Operand<OtherVT> {
let EncoderMethod =
"getBranchSymbolOpValue<CSKY::fixup_csky_pcrel_imm16_scale2>";
let ParserMatchClass = CSKYSymbol;
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
index 3be1ca8b7998..2d7fb85e89fa 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
@@ -24,7 +24,7 @@ def CSKY_NIR : SDNode<"CSKYISD::NIR", SDTNone,
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
-def br_symbol_16bit : Operand<iPTR> {
+def br_symbol_16bit : Operand<OtherVT> {
let EncoderMethod =
"getBranchSymbolOpValue<CSKY::fixup_csky_pcrel_imm10_scale2>";
let ParserMatchClass = CSKYSymbol;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
index 1a69dc8acde0..64f01cd1c9fa 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
@@ -26,6 +26,7 @@
#include "llvm/MC/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "CSKYGenInstrInfo.inc"
#define GET_REGINFO_MC_DESC
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
index 4b8c45e95b74..1137b4d6e9b1 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
@@ -41,6 +41,7 @@ MCCodeEmitter *createCSKYMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx);
#include "CSKYGenRegisterInfo.inc"
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "CSKYGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 4d6e1a9d3166..709279889653 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -116,7 +116,7 @@ def ThreadId :dxil_op< "ThreadId", 93, ThreadIdClass, ComputeID, "reads the thr
dxil_param<1, "i32", "opcode", "DXIL opcode">,
dxil_param<2, "i32", "component", "component to read (x,y,z)">
]>,
- dxil_map_intrinsic<int_dxil_thread_id>;
+ dxil_map_intrinsic<int_dx_thread_id>;
def GroupId :dxil_op< "GroupId", 94, GroupIdClass, ComputeID, "reads the group ID (SV_GroupID)", "i32;", "rn",
[
@@ -124,7 +124,7 @@ def GroupId :dxil_op< "GroupId", 94, GroupIdClass, ComputeID, "reads the group
dxil_param<1, "i32", "opcode", "DXIL opcode">,
dxil_param<2, "i32", "component", "component to read">
]>,
- dxil_map_intrinsic<int_dxil_group_id>;
+ dxil_map_intrinsic<int_dx_group_id>;
def ThreadIdInGroup :dxil_op< "ThreadIdInGroup", 95, ThreadIdInGroupClass, ComputeID,
"reads the thread ID within the group (SV_GroupThreadID)", "i32;", "rn",
@@ -133,7 +133,7 @@ def ThreadIdInGroup :dxil_op< "ThreadIdInGroup", 95, ThreadIdInGroupClass, Comp
dxil_param<1, "i32", "opcode", "DXIL opcode">,
dxil_param<2, "i32", "component", "component to read (x,y,z)">
]>,
- dxil_map_intrinsic<int_dxil_thread_id_in_group>;
+ dxil_map_intrinsic<int_dx_thread_id_in_group>;
def FlattenedThreadIdInGroup :dxil_op< "FlattenedThreadIdInGroup", 96, FlattenedThreadIdInGroupClass, ComputeID,
"provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i32;", "rn",
@@ -141,4 +141,4 @@ def FlattenedThreadIdInGroup :dxil_op< "FlattenedThreadIdInGroup", 96, Flattene
dxil_param<0, "i32", "", "result">,
dxil_param<1, "i32", "opcode", "DXIL opcode">
]>,
- dxil_map_intrinsic<int_dxil_flattened_thread_id_in_group>;
+ dxil_map_intrinsic<int_dx_flattened_thread_id_in_group>;
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 494a71e51a89..3e09270a66d0 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -595,6 +595,10 @@ unsigned DXILBitcodeWriter::getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
return bitc::RMW_FADD;
case AtomicRMWInst::FSub:
return bitc::RMW_FSUB;
+ case AtomicRMWInst::FMax:
+ return bitc::RMW_FMAX;
+ case AtomicRMWInst::FMin:
+ return bitc::RMW_FMIN;
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 48d339234e9e..1064296b0991 100644
--- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -743,6 +743,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
/// Print out a single Hexagon MI to the current output stream.
void HexagonAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ Hexagon_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
MCInst MCB;
MCB.setOpcode(Hexagon::BUNDLE);
MCB.addOperand(MCOperand::createImm(0));
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 0b4a95bc9ce5..01501109f3b1 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -1024,7 +1024,7 @@ void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
for (auto &B : MF) {
auto At = findCFILocation(B);
if (At)
- insertCFIInstructionsAt(B, At.getValue());
+ insertCFIInstructionsAt(B, At.value());
}
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index ed2856eb1fe9..9c235776c160 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -376,11 +376,9 @@ void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
State.Bundle = &MI;
State.Index = 0;
size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1;
- FeatureBitset Features = computeAvailableFeatures(STI.getFeatureBits());
for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
MCInst &HMI = const_cast<MCInst &>(*I.getInst());
- verifyInstructionPredicates(HMI, Features);
EncodeSingleInstruction(HMI, OS, Fixups, STI, parseBits(Last, HMB, HMI));
State.Extended = HexagonMCInstrInfo::isImmext(HMI);
@@ -793,5 +791,4 @@ MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
return new HexagonMCCodeEmitter(MII, MCT);
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "HexagonGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index 9e86dc8e4989..151964bf818b 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -81,11 +81,6 @@ private:
// Return parse bits for instruction `MCI' inside bundle `MCB'
uint32_t parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const;
-
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index d068baf05998..f2d1173cd503 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -46,6 +46,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "HexagonGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index d717e710f3c0..3932077c08f1 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -110,6 +110,7 @@ unsigned HexagonConvertUnits(unsigned ItinUnits, unsigned *Lanes);
//
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_SCHED_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "HexagonGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index d715ba901a2b..33e7068622f1 100644
--- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -705,14 +705,14 @@ LanaiAsmParser::parseRegister(bool RestoreOnFailure) {
RegNum = MatchRegisterName(Lexer.getTok().getIdentifier());
if (RegNum == 0) {
if (PercentTok && RestoreOnFailure)
- Lexer.UnLex(PercentTok.getValue());
+ Lexer.UnLex(PercentTok.value());
return nullptr;
}
Parser.Lex(); // Eat identifier token
return LanaiOperand::createReg(RegNum, Start, End);
}
if (PercentTok && RestoreOnFailure)
- Lexer.UnLex(PercentTok.getValue());
+ Lexer.UnLex(PercentTok.value());
return nullptr;
}
diff --git a/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp b/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
index c0b7fd3fdd5d..d142fd3a414f 100644
--- a/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -195,6 +195,9 @@ void LanaiAsmPrinter::customEmitInstruction(const MachineInstr *MI) {
}
void LanaiAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ Lanai_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index eb6bf8d3836c..c43450869832 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -28,6 +28,7 @@
#include <string>
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "LanaiGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
index e8da1bc88142..93fe1a4609d8 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
@@ -43,6 +43,7 @@ std::unique_ptr<MCObjectTargetWriter> createLanaiELFObjectWriter(uint8_t OSABI);
// Defines symbolic names for the Lanai instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "LanaiGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
index dd61bb2df077..1467d1757ff0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
@@ -27,6 +27,9 @@ using namespace llvm;
#include "LoongArchGenMCPseudoLowering.inc"
void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ LoongArch_MC::verifyInstructionPredicates(
+ MI->getOpcode(), getSubtargetInfo().getFeatureBits());
+
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
index 7e5aa49f227c..b51c19188051 100644
--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
@@ -39,6 +39,10 @@ public:
// tblgen'erated function.
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
const MachineInstr *MI);
+ // Wrapper needed for tblgenned pseudo lowering.
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
+ return lowerLoongArchMachineOperandToMCOperand(MO, MCOp, *this);
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 5b117d40e0a9..20448492a558 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -11,6 +11,22 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// LoongArch specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_LoongArchMOVGR2FR_W_LA64
+ : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>;
+def SDT_LoongArchMOVFR2GR_S_LA64
+ : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
+def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
+
+def loongarch_movgr2fr_w_la64
+ : SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
+def loongarch_movfr2gr_s_la64
+ : SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>;
+def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
+
+//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -149,6 +165,7 @@ def : PatFPSetcc<SETULT, FCMP_CULT_S, FPR32>;
def : PatFPSetcc<SETULE, FCMP_CULE_S, FPR32>;
def : PatFPSetcc<SETUNE, FCMP_CUNE_S, FPR32>;
def : PatFPSetcc<SETUO, FCMP_CUN_S, FPR32>;
+def : PatFPSetcc<SETLT, FCMP_CLT_S, FPR32>;
// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions.
@@ -174,4 +191,39 @@ def : PatFPSelectcc<SETULE, FCMP_CULE_S, FSEL_S, FPR32>;
def : PatFPSelectcc<SETUNE, FCMP_CUNE_S, FSEL_S, FPR32>;
def : PatFPSelectcc<SETUO, FCMP_CUN_S, FSEL_S, FPR32>;
+/// Loads
+
+defm : LdPat<load, FLD_S, f32>;
+
+/// Stores
+
+defm : StPat<store, FST_S, FPR32, f32>;
+
+/// Floating point constants
+
+def : Pat<(f32 fpimm0), (MOVGR2FR_W R0)>;
+def : Pat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W R0))>;
+def : Pat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1)))>;
+
+// FP Conversion
+def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>;
} // Predicates = [HasBasicF]
+
+let Predicates = [HasBasicF, IsLA64] in {
+// GPR -> FPR
+def : Pat<(loongarch_movgr2fr_w_la64 GPR:$src), (MOVGR2FR_W GPR:$src)>;
+// FPR -> GPR
+def : Pat<(loongarch_movfr2gr_s_la64 FPR32:$src),
+ (MOVFR2GR_S FPR32:$src)>;
+// int -> f32
+def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_W (MOVGR2FR_W GPR:$src))>;
+} // Predicates = [HasBasicF, IsLA64]
+
+let Predicates = [HasBasicF, IsLA32] in {
+// GPR -> FPR
+def : Pat<(bitconvert (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>;
+// FPR -> GPR
+def : Pat<(i32 (bitconvert FPR32:$src)), (MOVFR2GR_S FPR32:$src)>;
+// int -> f32
+def : Pat<(f32 (sint_to_fp (i32 GPR:$src))), (FFINT_S_W (MOVGR2FR_W GPR:$src))>;
+} // Predicates = [HasBasicF, IsLA64]
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index 07fa61f4c361..bb50cec9f4c0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -131,6 +131,11 @@ def MOVGR2FR_D : FP_MOV<0b0000000100010100101010, "movgr2fr.d", FPR64, GPR>;
def MOVFR2GR_D : FP_MOV<0b0000000100010100101110, "movfr2gr.d", GPR, FPR64>;
} // Predicates = [HasBasicD, IsLA64]
+// Instructions only available on LA32
+let Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 in {
+def MOVGR2FR_W_64 : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR64, GPR>;
+} // Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1
+
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
@@ -164,6 +169,7 @@ def : PatFPSetcc<SETULT, FCMP_CULT_D, FPR64>;
def : PatFPSetcc<SETULE, FCMP_CULE_D, FPR64>;
def : PatFPSetcc<SETUNE, FCMP_CUNE_D, FPR64>;
def : PatFPSetcc<SETUO, FCMP_CUN_D, FPR64>;
+def : PatFPSetcc<SETLT, FCMP_CLT_D, FPR64>;
// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions.
@@ -185,4 +191,52 @@ def : PatFPSelectcc<SETULE, FCMP_CULE_D, FSEL_D, FPR64>;
def : PatFPSelectcc<SETUNE, FCMP_CUNE_D, FSEL_D, FPR64>;
def : PatFPSelectcc<SETUO, FCMP_CUN_D, FSEL_D, FPR64>;
+/// Loads
+
+defm : LdPat<load, FLD_D, f64>;
+
+/// Stores
+
+defm : StPat<store, FST_D, FPR64, f64>;
+
+/// FP conversion operations
+
+def : Pat<(loongarch_ftint FPR64:$src), (FTINTRZ_W_D FPR64:$src)>;
+def : Pat<(f64 (loongarch_ftint FPR64:$src)), (FTINTRZ_L_D FPR64:$src)>;
+def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_L_S FPR32:$src)>;
+
+// f64 -> f32
+def : Pat<(f32 (fpround FPR64:$src)), (FCVT_S_D FPR64:$src)>;
+// f32 -> f64
+def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>;
} // Predicates = [HasBasicD]
+
+/// Floating point constants
+
+let Predicates = [HasBasicD, IsLA64] in {
+def : Pat<(f64 fpimm0), (MOVGR2FR_D R0)>;
+def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FR_D R0))>;
+def : Pat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D R0, 1)))>;
+
+// Convert int to FP
+def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))),
+ (FFINT_D_W (MOVGR2FR_W GPR:$src))>;
+def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>;
+
+def : Pat<(f64 (uint_to_fp (i64 (zexti32 (i64 GPR:$src))))),
+ (FFINT_D_W (MOVGR2FR_W GPR:$src))>;
+
+def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>;
+
+// Convert FP to int
+def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>;
+} // Predicates = [HasBasicD, IsLA64]
+
+let Predicates = [HasBasicD, IsLA32] in {
+def : Pat<(f64 fpimm0), (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0)>;
+def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0))>;
+def : Pat<(f64 fpimm1), (FCVT_D_S (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1))))>;
+
+// Convert int to FP
+def : Pat<(f64 (sint_to_fp (i32 GPR:$src))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>;
+} // Predicates = [HasBasicD, IsLA32]
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index 7182d55ca3cf..0d9ec9e2eaaa 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -11,7 +11,9 @@
//===----------------------------------------------------------------------===//
#include "LoongArchFrameLowering.h"
+#include "LoongArchMachineFunctionInfo.h"
#include "LoongArchSubtarget.h"
+#include "MCTargetDesc/LoongArchBaseInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -44,12 +46,178 @@ bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const {
return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF);
}
+void LoongArchFrameLowering::adjustReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register DestReg,
+ Register SrcReg, int64_t Val,
+ MachineInstr::MIFlag Flag) const {
+ const LoongArchInstrInfo *TII = STI.getInstrInfo();
+ bool IsLA64 = STI.is64Bit();
+
+ if (DestReg == SrcReg && Val == 0)
+ return;
+
+ if (isInt<12>(Val)) {
+ // addi.w/d $DstReg, $SrcReg, Val
+ BuildMI(MBB, MBBI, DL,
+ TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), DestReg)
+ .addReg(SrcReg)
+ .addImm(Val)
+ .setMIFlag(Flag);
+ return;
+ }
+
+ report_fatal_error("adjustReg cannot yet handle adjustments >12 bits");
+}
+
+// Determine the size of the frame and maximum call frame size.
+void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t FrameSize = MFI.getStackSize();
+
+ // Make sure the frame is aligned.
+ FrameSize = alignTo(FrameSize, getStackAlign());
+
+ // Update frame info.
+ MFI.setStackSize(FrameSize);
+}
+
void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- // TODO: Implement this when we have function calls
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
+ const LoongArchInstrInfo *TII = STI.getInstrInfo();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+
+ Register SPReg = LoongArch::R3;
+ Register FPReg = LoongArch::R22;
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
+
+ // Determine the correct frame layout
+ determineFrameLayout(MF);
+
+ // First, compute final stack size.
+ uint64_t StackSize = MFI.getStackSize();
+
+ // Early exit if there is no need to allocate space in the stack.
+ if (StackSize == 0 && !MFI.adjustsStack())
+ return;
+
+ // Adjust stack.
+ adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup);
+ // Emit ".cfi_def_cfa_offset StackSize".
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ const auto &CSI = MFI.getCalleeSavedInfo();
+
+ // The frame pointer is callee-saved, and code has been generated for us to
+ // save it to the stack. We need to skip over the storing of callee-saved
+ // registers as the frame pointer must be modified after it has been saved
+ // to the stack, not before.
+ std::advance(MBBI, CSI.size());
+
+ // Iterate over list of callee-saved registers and emit .cfi_offset
+ // directives.
+ for (const auto &Entry : CSI) {
+ int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx());
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, RI->getDwarfRegNum(Entry.getReg(), true), Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // Generate new FP.
+ if (hasFP(MF)) {
+ adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize, MachineInstr::FrameSetup);
+
+ // Emit ".cfi_def_cfa $fp, 0"
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+ nullptr, RI->getDwarfRegNum(FPReg, true), 0));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- // TODO: Implement this when we have function calls
+ const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ Register SPReg = LoongArch::R3;
+
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ const auto &CSI = MFI.getCalleeSavedInfo();
+ // Skip to before the restores of callee-saved registers.
+ auto LastFrameDestroy = MBBI;
+ if (!CSI.empty())
+ LastFrameDestroy = std::prev(MBBI, CSI.size());
+
+ // Get the number of bytes from FrameInfo.
+ uint64_t StackSize = MFI.getStackSize();
+
+ // Restore the stack pointer.
+ if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) {
+ assert(hasFP(MF) && "frame pointer should not have been eliminated");
+ adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, -StackSize,
+ MachineInstr::FrameDestroy);
+ }
+
+ // Deallocate stack
+ adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy);
+}
+
+void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+ // Unconditionally spill RA and FP only if the function uses a frame
+ // pointer.
+ if (hasFP(MF)) {
+ SavedRegs.set(LoongArch::R1);
+ SavedRegs.set(LoongArch::R22);
+ }
+ // Mark BP as used if function has dedicated base pointer.
+ if (hasBP(MF))
+ SavedRegs.set(LoongArchABI::getBPReg());
+}
+
+StackOffset LoongArchFrameLowering::getFrameIndexReference(
+ const MachineFunction &MF, int FI, Register &FrameReg) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+
+ // Callee-saved registers should be referenced relative to the stack
+ // pointer (positive offset), otherwise use the frame pointer (negative
+ // offset).
+ const auto &CSI = MFI.getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+ StackOffset Offset =
+ StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() +
+ MFI.getOffsetAdjustment());
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ FrameReg = RI->getFrameRegister(MF);
+ if ((FI >= MinCSFI && FI <= MaxCSFI) || !hasFP(MF)) {
+ FrameReg = LoongArch::R3;
+ Offset += StackOffset::getFixed(MFI.getStackSize());
+ }
+
+ return Offset;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
index 25c53efc10f1..014b666de711 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
@@ -31,8 +31,26 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
+
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override {
+ return MBB.erase(MI);
+ }
+
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
+
bool hasFP(const MachineFunction &MF) const override;
bool hasBP(const MachineFunction &MF) const;
+
+private:
+ void determineFrameLayout(MachineFunction &MF) const;
+ void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register DestReg, Register SrcReg,
+ int64_t Val, MachineInstr::MIFlag Flag) const;
};
} // namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index cc9ea0255d98..bb40ff817574 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -33,13 +33,14 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
MVT GRLenVT = Subtarget->getGRLenVT();
SDLoc DL(Node);
+ MVT VT = Node->getSimpleValueType(0);
switch (Opcode) {
default:
break;
case ISD::Constant: {
int64_t Imm = cast<ConstantSDNode>(Node)->getSExtValue();
- if (Imm == 0 && Node->getSimpleValueType(0) == GRLenVT) {
+ if (Imm == 0 && VT == GRLenVT) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
LoongArch::R0, GRLenVT);
ReplaceNode(Node, New.getNode());
@@ -60,6 +61,15 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Result);
return;
}
+ case ISD::FrameIndex: {
+ SDValue Imm = CurDAG->getTargetConstant(0, DL, GRLenVT);
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
+ unsigned ADDIOp =
+ Subtarget->is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
+ ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm));
+ return;
+ }
// TODO: Add selection nodes needed later.
}
@@ -67,6 +77,17 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
SelectCode(Node);
}
+bool LoongArchDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {
+ // If this is FrameIndex, select it directly. Otherwise just let it get
+ // selected to a register independently.
+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr))
+ Base =
+ CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getGRLenVT());
+ else
+ Base = Addr;
+ return true;
+}
+
bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
SDValue &ShAmt) {
// Shift instructions on LoongArch only read the lower 5 or 6 bits of the
@@ -125,6 +146,39 @@ bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
return true;
}
+bool LoongArchDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) {
+ if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+ Val = N.getOperand(0);
+ return true;
+ }
+ MVT VT = N.getSimpleValueType();
+ if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) {
+ Val = N;
+ return true;
+ }
+
+ return false;
+}
+
+bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
+ if (N.getOpcode() == ISD::AND) {
+ auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) {
+ Val = N.getOperand(0);
+ return true;
+ }
+ }
+ MVT VT = N.getSimpleValueType();
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32);
+ if (CurDAG->MaskedValueIsZero(N, Mask)) {
+ Val = N;
+ return true;
+ }
+
+ return false;
+}
+
// This pass converts a legalized DAG into a LoongArch-specific DAG, ready
// for instruction scheduling.
FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index f477129d933c..7ad329a64424 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -38,6 +38,8 @@ public:
void Select(SDNode *Node) override;
+ bool SelectBaseAddr(SDValue Addr, SDValue &Base);
+
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) {
return selectShiftMask(N, Subtarget->getGRLen(), ShAmt);
@@ -46,6 +48,9 @@ public:
return selectShiftMask(N, 32, ShAmt);
}
+ bool selectSExti32(SDValue N, SDValue &Val);
+ bool selectZExti32(SDValue N, SDValue &Val);
+
// Include the pieces autogenerated from the target description.
#include "LoongArchGenDAGISel.inc"
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d5a469216859..4acf90bd9788 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -17,14 +17,21 @@
#include "LoongArchRegisterInfo.h"
#include "LoongArchSubtarget.h"
#include "LoongArchTargetMachine.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
#define DEBUG_TYPE "loongarch-isel-lowering"
+static cl::opt<bool> ZeroDivCheck(
+ "loongarch-check-zero-division", cl::Hidden,
+ cl::desc("Trap on integer division by zero."),
+ cl::init(false));
+
LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
const LoongArchSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -37,15 +44,25 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
if (Subtarget.hasBasicD())
addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
+ MVT::i1, Promote);
+
// TODO: add necessary setOperationAction calls later.
setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
+
+ setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ if (Subtarget.hasBasicF() && !Subtarget.hasBasicD())
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
}
static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE,
@@ -58,10 +75,19 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
if (Subtarget.hasBasicD()) {
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
}
+ setOperationAction(ISD::BR_CC, GRLenVT, Expand);
setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
+ if (!Subtarget.is64Bit())
+ setLibcallName(RTLIB::MUL_I128, nullptr);
+
+ setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom);
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
@@ -70,11 +96,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setBooleanContents(ZeroOrOneBooleanContent);
+ setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
+
// Function alignments.
const Align FunctionAlignment(4);
setMinFunctionAlignment(FunctionAlignment);
setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::SRL);
}
@@ -83,6 +112,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
switch (Op.getOpcode()) {
default:
report_fatal_error("unimplemented operand");
+ case ISD::GlobalAddress:
+ return lowerGlobalAddress(Op, DAG);
case ISD::SHL_PARTS:
return lowerShiftLeftParts(Op, DAG);
case ISD::SRA_PARTS:
@@ -96,7 +127,105 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
return SDValue();
+ case ISD::ConstantPool:
+ return lowerConstantPool(Op, DAG);
+ case ISD::FP_TO_SINT:
+ return lowerFP_TO_SINT(Op, DAG);
+ case ISD::BITCAST:
+ return lowerBITCAST(Op, DAG);
+ case ISD::FP_TO_UINT:
+ return SDValue();
+ case ISD::UINT_TO_FP:
+ return lowerUINT_TO_FP(Op, DAG);
+ }
+}
+
+SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SDLoc DL(Op);
+ auto &TLI = DAG.getTargetLoweringInfo();
+ SDValue Tmp1, Tmp2;
+ SDValue Op1 = Op.getOperand(0);
+ if (Op1->getOpcode() == ISD::AssertZext ||
+ Op1->getOpcode() == ISD::AssertSext)
+ return Op;
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op.getOperand(0));
+ SDValue Res = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f64, Trunc);
+ SDNode *N = Res.getNode();
+ TLI.expandUINT_TO_FP(N, Tmp1, Tmp2, DAG);
+ return Tmp1;
+}
+
+SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+
+ if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
+ Subtarget.is64Bit() && Subtarget.hasBasicF()) {
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+ return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
+ }
+ return Op;
+}
+
+SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SDLoc DL(Op);
+
+ if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
+ !Subtarget.hasBasicD()) {
+ SDValue Dst =
+ DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
+ }
+
+ EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
+ SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
+}
+
+SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT Ty = Op.getValueType();
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+
+ // FIXME: Only support PC-relative addressing to access the symbol.
+ // Target flags will be added later.
+ if (!isPositionIndependent()) {
+ SDValue ConstantN = DAG.getTargetConstantPool(
+ N->getConstVal(), Ty, N->getAlign(), N->getOffset());
+ SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, ConstantN),
+ 0);
+ SDValue Addr(DAG.getMachineNode(Subtarget.is64Bit() ? LoongArch::ADDI_D
+ : LoongArch::ADDI_W,
+ DL, Ty, AddrHi, ConstantN),
+ 0);
+ return Addr;
}
+ report_fatal_error("Unable to lower ConstantPool");
+}
+
+SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ unsigned ADDIOp = Subtarget.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
+
+ // FIXME: Only support PC-relative addressing to access the symbol.
+ // TODO: Add target flags.
+ if (!isPositionIndependent()) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty);
+ SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, GA), 0);
+ SDValue Addr(DAG.getMachineNode(ADDIOp, DL, Ty, AddrHi, GA), 0);
+ return Addr;
+ }
+ report_fatal_error("Unable to lowerGlobalAddress");
}
SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
@@ -238,6 +367,36 @@ void LoongArchTargetLowering::ReplaceNodeResults(
break;
}
break;
+ case ISD::FP_TO_SINT: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ SDValue Src = N->getOperand(0);
+ EVT VT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
+ SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, VT, Src);
+ Results.push_back(DAG.getNode(ISD::BITCAST, DL, N->getValueType(0), Dst));
+ break;
+ }
+ case ISD::BITCAST: {
+ EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
+ Subtarget.hasBasicF()) {
+ SDValue Dst =
+ DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
+ }
+ break;
+ }
+ case ISD::FP_TO_UINT: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ auto &TLI = DAG.getTargetLoweringInfo();
+ SDValue Tmp1, Tmp2;
+ TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
+ break;
+ }
}
}
@@ -345,6 +504,224 @@ static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ MVT GRLenVT = Subtarget.getGRLenVT();
+ EVT ValTy = N->getValueType(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantSDNode *CN0, *CN1;
+ SDLoc DL(N);
+ unsigned ValBits = ValTy.getSizeInBits();
+ unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
+ unsigned Shamt;
+ bool SwapAndRetried = false;
+
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (ValBits != 32 && ValBits != 64)
+ return SDValue();
+
+Retry:
+ // 1st pattern to match BSTRINS:
+ // R = or (and X, mask0), (and (shl Y, lsb), mask1)
+ // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
+ // =>
+ // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
+ if (N0.getOpcode() == ISD::AND &&
+ (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
+ isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
+ N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
+ MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
+ (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
+ (MaskIdx0 + MaskLen0 <= ValBits)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ N1.getOperand(0).getOperand(0),
+ DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
+ DAG.getConstant(MaskIdx0, DL, GRLenVT));
+ }
+
+ // 2nd pattern to match BSTRINS:
+ // R = or (and X, mask0), (shl (and Y, mask1), lsb)
+ // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
+ // =>
+ // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
+ if (N0.getOpcode() == ISD::AND &&
+ (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
+ isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
+ N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
+ isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
+ MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
+ (MaskIdx0 + MaskLen0 <= ValBits)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ N1.getOperand(0).getOperand(0),
+ DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
+ DAG.getConstant(MaskIdx0, DL, GRLenVT));
+ }
+
+ // 3rd pattern to match BSTRINS:
+ // R = or (and X, mask0), (and Y, mask1)
+ // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
+ // =>
+ // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
+ // where msb = lsb + size - 1
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
+ (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
+ isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
+ (MaskIdx0 + MaskLen0 <= 64) &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
+ (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
+ DAG.getConstant(MaskIdx0, DL, GRLenVT)),
+ DAG.getConstant(ValBits == 32
+ ? (MaskIdx0 + (MaskLen0 & 31) - 1)
+ : (MaskIdx0 + MaskLen0 - 1),
+ DL, GRLenVT),
+ DAG.getConstant(MaskIdx0, DL, GRLenVT));
+ }
+
+ // 4th pattern to match BSTRINS:
+ // R = or (and X, mask), (shl Y, shamt)
+ // where mask = (2**shamt - 1)
+ // =>
+ // R = BSTRINS X, Y, ValBits - 1, shamt
+ // where ValBits = 32 or 64
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
+ (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
+ isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
+ MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ (Shamt = CN1->getZExtValue()) == MaskLen0 &&
+ (MaskIdx0 + MaskLen0 <= ValBits)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ N1.getOperand(0),
+ DAG.getConstant((ValBits - 1), DL, GRLenVT),
+ DAG.getConstant(Shamt, DL, GRLenVT));
+ }
+
+ // 5th pattern to match BSTRINS:
+ // R = or (and X, mask), const
+ // where ~mask = (2**size - 1) << lsb, mask & const = 0
+ // =>
+ // R = BSTRINS X, (const >> lsb), msb, lsb
+ // where msb = lsb + size - 1
+ if (N0.getOpcode() == ISD::AND &&
+ (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
+ isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
+ (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
+ (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
+ return DAG.getNode(
+ LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
+ DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
+ DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
+ DAG.getConstant(MaskIdx0, DL, GRLenVT));
+ }
+
+ // 6th pattern.
+ // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
+ // by the incoming bits are known to be zero.
+ // =>
+ // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
+ //
+ // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
+ // pattern is more common than the 1st. So we put the 1st before the 6th in
+ // order to match as many nodes as possible.
+ ConstantSDNode *CNMask, *CNShamt;
+ unsigned MaskIdx, MaskLen;
+ if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
+ (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
+ isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
+ MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ CNShamt->getZExtValue() + MaskLen <= ValBits) {
+ Shamt = CNShamt->getZExtValue();
+ APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
+ if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
+ N1.getOperand(0).getOperand(0),
+ DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
+ DAG.getConstant(Shamt, DL, GRLenVT));
+ }
+ }
+
+ // 7th pattern.
+ // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
+ // overwritten by the incoming bits are known to be zero.
+ // =>
+ // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
+ //
+ // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
+ // before the 7th in order to match as many nodes as possible.
+ if (N1.getOpcode() == ISD::AND &&
+ (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
+ N1.getOperand(0).getOpcode() == ISD::SHL &&
+ (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
+ CNShamt->getZExtValue() == MaskIdx) {
+ APInt ShMask(ValBits, CNMask->getZExtValue());
+ if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
+ N1.getOperand(0).getOperand(0),
+ DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
+ DAG.getConstant(MaskIdx, DL, GRLenVT));
+ }
+ }
+
+ // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
+ if (!SwapAndRetried) {
+ std::swap(N0, N1);
+ SwapAndRetried = true;
+ goto Retry;
+ }
+
+ SwapAndRetried = false;
+Retry2:
+ // 8th pattern.
+ // a = b | (c & shifted_mask), where all positions in b to be overwritten by
+ // the incoming bits are known to be zero.
+ // =>
+ // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
+ //
+ // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
+ // we put it here in order to match as many nodes as possible or generate less
+ // instructions.
+ if (N1.getOpcode() == ISD::AND &&
+ (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
+ isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
+ APInt ShMask(ValBits, CNMask->getZExtValue());
+ if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
+ LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
+ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
+ DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
+ N1->getOperand(0),
+ DAG.getConstant(MaskIdx, DL, GRLenVT)),
+ DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
+ DAG.getConstant(MaskIdx, DL, GRLenVT));
+ }
+ }
+ // Swap N0/N1 and retry.
+ if (!SwapAndRetried) {
+ std::swap(N0, N1);
+ SwapAndRetried = true;
+ goto Retry2;
+ }
+
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -353,12 +730,62 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::AND:
return performANDCombine(N, DAG, DCI, Subtarget);
+ case ISD::OR:
+ return performORCombine(N, DAG, DCI, Subtarget);
case ISD::SRL:
return performSRLCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
}
+static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
+ MachineBasicBlock &MBB,
+ const TargetInstrInfo &TII) {
+ if (!ZeroDivCheck)
+ return &MBB;
+
+ // Build instructions:
+ // div(or mod) $dst, $dividend, $divisor
+ // bnez $divisor, 8
+ // break 7
+ // fallthrough
+ MachineOperand &Divisor = MI.getOperand(2);
+ auto FallThrough = std::next(MI.getIterator());
+
+ BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BNEZ))
+ .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill()))
+ .addImm(8);
+
+ // See linux header file arch/loongarch/include/uapi/asm/break.h for the
+ // definition of BRK_DIVZERO.
+ BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BREAK))
+ .addImm(7/*BRK_DIVZERO*/);
+
+ // Clear Divisor's kill flag.
+ Divisor.setIsKill(false);
+
+ return &MBB;
+}
+
+MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr &MI, MachineBasicBlock *BB) const {
+
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
+ case LoongArch::DIV_W:
+ case LoongArch::DIV_WU:
+ case LoongArch::MOD_W:
+ case LoongArch::MOD_WU:
+ case LoongArch::DIV_D:
+ case LoongArch::DIV_DU:
+ case LoongArch::MOD_D:
+ case LoongArch::MOD_DU:
+ return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo());
+ break;
+ }
+}
+
const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((LoongArchISD::NodeType)Opcode) {
case LoongArchISD::FIRST_NUMBER:
@@ -369,11 +796,16 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "LoongArchISD::" #node;
// TODO: Add more target-dependent nodes later.
+ NODE_NAME_CASE(CALL)
NODE_NAME_CASE(RET)
NODE_NAME_CASE(SLL_W)
NODE_NAME_CASE(SRA_W)
NODE_NAME_CASE(SRL_W)
+ NODE_NAME_CASE(BSTRINS)
NODE_NAME_CASE(BSTRPICK)
+ NODE_NAME_CASE(MOVGR2FR_W_LA64)
+ NODE_NAME_CASE(MOVFR2GR_S_LA64)
+ NODE_NAME_CASE(FTINT)
}
#undef NODE_NAME_CASE
return nullptr;
@@ -483,6 +915,132 @@ SDValue LoongArchTargetLowering::LowerFormalArguments(
return Chain;
}
+// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
+// and output parameter nodes.
+SDValue
+LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc &DL = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ CLI.IsTailCall = false;
+
+ if (IsVarArg)
+ report_fatal_error("LowerCall with varargs not implemented");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Analyze the operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign> ArgLocs;
+ CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+ analyzeOutputArgs(ArgCCInfo, Outs, CC_LoongArch);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+
+ for (auto &Arg : Outs) {
+ if (!Arg.Flags.isByVal())
+ continue;
+ report_fatal_error("Passing arguments byval not implemented");
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
+
+ // Copy argument values to their designated locations.
+ SmallVector<std::pair<Register, SDValue>> RegsToPass;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue ArgValue = OutVals[i];
+
+ // Promote the value if needed.
+ // For now, only handle fully promoted arguments.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ report_fatal_error("Unknown loc info");
+
+ if (VA.isRegLoc()) {
+ // Queue up the argument copies and emit them at the end.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
+ } else {
+ report_fatal_error("Passing arguments via the stack not implemented");
+ }
+ }
+
+ SDValue Glue;
+
+ // Build a sequence of copy-to-reg nodes, chained and glued together.
+ for (auto &Reg : RegsToPass) {
+ Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
+ Glue = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
+ // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
+ // split it and then direct call can be matched by PseudoCALL.
+ // FIXME: Add target flags for relocation.
+ if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT);
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT);
+
+ // The first call operand is the chain and the second is the target address.
+ SmallVector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (auto &Reg : RegsToPass)
+ Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // Glue the call to the argument copies, if any.
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+
+ // Emit the call.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
+ Glue = Chain.getValue(1);
+
+ // Mark the end of the call, which is glued to the call itself.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true),
+ DAG.getConstant(0, DL, PtrVT, true), Glue, DL);
+ Glue = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign> RVLocs;
+ CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
+ analyzeInputArgs(RetCCInfo, Ins, CC_LoongArch);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (auto &VA : RVLocs) {
+ // Copy the value out.
+ SDValue RetValue =
+ DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
+ Chain = RetValue.getValue(1);
+ Glue = RetValue.getValue(2);
+
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
bool LoongArchTargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
@@ -529,3 +1087,14 @@ SDValue LoongArchTargetLowering::LowerReturn(
return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
}
+
+bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
+ assert((VT == MVT::f32 || VT == MVT::f64) && "Unexpected VT");
+
+ if (VT == MVT::f32 && !Subtarget.hasBasicF())
+ return false;
+ if (VT == MVT::f64 && !Subtarget.hasBasicD())
+ return false;
+ return (Imm.isZero() || Imm.isExactlyValue(+1.0));
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index c852577a3744..279550482675 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -27,6 +27,7 @@ enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
// TODO: add more LoongArchISDs
+ CALL,
RET,
// 32-bit shifts, directly matching the semantics of the named LoongArch
// instructions.
@@ -34,6 +35,13 @@ enum NodeType : unsigned {
SRA_W,
SRL_W,
+ // FPR<->GPR transfer operations
+ MOVGR2FR_W_LA64,
+ MOVFR2GR_S_LA64,
+
+ FTINT,
+
+ BSTRINS,
BSTRPICK,
};
@@ -72,6 +80,8 @@ public:
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
private:
/// Target-specific function used to lower LoongArch calling conventions.
@@ -86,8 +96,24 @@ private:
const SmallVectorImpl<ISD::OutputArg> &Outs,
LoongArchCCAssignFn Fn) const;
+ SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
+
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *BB) const override;
+ SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
+
+ bool shouldInsertFencesForAtomic(const Instruction *I) const override {
+ return isa<LoadInst>(I) || isa<StoreInst>(I);
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 146ef53befd5..bcbd4b28f3c7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -12,6 +12,7 @@
#include "LoongArchInstrInfo.h"
#include "LoongArch.h"
+#include "LoongArchMachineFunctionInfo.h"
using namespace llvm;
@@ -19,8 +20,8 @@ using namespace llvm;
#include "LoongArchGenInstrInfo.inc"
LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI)
- // FIXME: add CFSetup and CFDestroy Inst when we implement function call.
- : LoongArchGenInstrInfo() {}
+ : LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN,
+ LoongArch::ADJCALLSTACKUP) {}
void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -47,3 +48,68 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
+
+void LoongArchInstrInfo::storeRegToStackSlot(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg,
+ bool IsKill, int FI, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end())
+ DL = I->getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+
+ unsigned Opcode;
+ if (LoongArch::GPRRegClass.hasSubClassEq(RC))
+ Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32
+ ? LoongArch::ST_W
+ : LoongArch::ST_D;
+ else if (LoongArch::FPR32RegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::FST_S;
+ else if (LoongArch::FPR64RegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::FST_D;
+ else
+ llvm_unreachable("Can't store this register to stack slot");
+
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
+
+ BuildMI(MBB, I, DL, get(Opcode))
+ .addReg(SrcReg, getKillRegState(IsKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
+void LoongArchInstrInfo::loadRegFromStackSlot(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg,
+ int FI, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end())
+ DL = I->getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+
+ unsigned Opcode;
+ if (LoongArch::GPRRegClass.hasSubClassEq(RC))
+ Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32
+ ? LoongArch::LD_W
+ : LoongArch::LD_D;
+ else if (LoongArch::FPR32RegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::FLD_S;
+ else if (LoongArch::FPR64RegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::FLD_D;
+ else
+ llvm_unreachable("Can't load this register from stack slot");
+
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
+
+ BuildMI(MBB, I, DL, get(Opcode), DstReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index f31943b85a51..0a8c86a5e0c2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -30,6 +30,16 @@ public:
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg,
bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register SrcReg,
+ bool IsKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register DstReg,
+ int FrameIndex, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 6b8ee9e43f94..d07d086bd7da 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -14,22 +14,45 @@
// LoongArch specific DAG Nodes.
//===----------------------------------------------------------------------===//
+// Target-independent type requirements, but with target-specific formats.
+def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
// Target-dependent type requirements.
+def SDT_LoongArchCall : SDTypeProfile<0, -1, [SDTCisVT<0, GRLenVT>]>;
def SDT_LoongArchIntBinOpW : SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>
]>;
+def SDT_LoongArchBStrIns: SDTypeProfile<1, 4, [
+ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>,
+ SDTCisSameAs<3, 4>
+]>;
+
def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [
SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3>
]>;
// TODO: Add LoongArch specific DAG Nodes
+// Target-independent nodes, but with target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
// Target-dependent nodes.
+def loongarch_call : SDNode<"LoongArchISD::CALL", SDT_LoongArchCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
+def loongarch_bstrins
+ : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>;
def loongarch_bstrpick
: SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>;
@@ -106,7 +129,14 @@ def simm16 : Operand<GRLenVT> {
let DecoderMethod = "decodeSImmOperand<16>";
}
-def simm16_lsl2 : Operand<GRLenVT> {
+def simm16_lsl2 : Operand<GRLenVT>,
+ ImmLeaf<GRLenVT, [{return isInt<16>(Imm>>2);}]> {
+ let ParserMatchClass = SImmAsmOperand<16, "lsl2">;
+ let EncoderMethod = "getImmOpValueAsr2";
+ let DecoderMethod = "decodeSImmOperand<16, 2>";
+}
+
+def simm16_lsl2_br : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<16, "lsl2">;
let EncoderMethod = "getImmOpValueAsr2";
let DecoderMethod = "decodeSImmOperand<16, 2>";
@@ -117,13 +147,13 @@ def simm20 : Operand<GRLenVT> {
let DecoderMethod = "decodeSImmOperand<20>";
}
-def simm21_lsl2 : Operand<GRLenVT> {
+def simm21_lsl2 : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<21, "lsl2">;
let EncoderMethod = "getImmOpValueAsr2";
let DecoderMethod = "decodeSImmOperand<21, 2>";
}
-def simm26_lsl2 : Operand<GRLenVT> {
+def simm26_lsl2 : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<26, "lsl2">;
let EncoderMethod = "getImmOpValueAsr2";
let DecoderMethod = "decodeSImmOperand<26, 2>";
@@ -141,6 +171,24 @@ def NegImm : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
+// FP immediate patterns.
+def fpimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(+0.0);}]>;
+def fpimm0neg : PatLeaf<(fpimm), [{return N->isExactlyValue(-0.0);}]>;
+def fpimm1 : PatLeaf<(fpimm), [{return N->isExactlyValue(+1.0);}]>;
+
+def CallSymbol: AsmOperandClass {
+ let Name = "CallSymbol";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImm";
+}
+
+// A bare symbol used in call only.
+def call_symbol : Operand<iPTR> {
+ let ParserMatchClass = CallSymbol;
+}
+
+def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">;
+
//===----------------------------------------------------------------------===//
// Instruction Formats
//===----------------------------------------------------------------------===//
@@ -185,7 +233,7 @@ class RDTIME_2R<bits<22> op, string opstr>
: Fmt2R<op, (outs GPR:$rd, GPR:$rj), (ins), opstr, "$rd, $rj">;
class BrCC_2RI16<bits<6> op, string opstr>
- : Fmt2RI16<op, (outs), (ins GPR:$rj, GPR:$rd, simm16_lsl2:$imm16), opstr,
+ : Fmt2RI16<op, (outs), (ins GPR:$rj, GPR:$rd, simm16_lsl2_br:$imm16), opstr,
"$rj, $rd, $imm16"> {
let isBranch = 1;
let isTerminator = 1;
@@ -274,10 +322,12 @@ def XORI : ALU_2RI12<0b0000001111, "xori", uimm12>;
def MUL_W : ALU_3R<0b00000000000111000, "mul.w">;
def MULH_W : ALU_3R<0b00000000000111001, "mulh.w">;
def MULH_WU : ALU_3R<0b00000000000111010, "mulh.wu">;
+let usesCustomInserter = true in {
def DIV_W : ALU_3R<0b00000000001000000, "div.w">;
def MOD_W : ALU_3R<0b00000000001000001, "mod.w">;
def DIV_WU : ALU_3R<0b00000000001000010, "div.wu">;
def MOD_WU : ALU_3R<0b00000000001000011, "mod.wu">;
+} // usesCustomInserter = true
// Bit-shift Instructions
def SLL_W : ALU_3R<0b00000000000101110, "sll.w">;
@@ -379,10 +429,12 @@ def MULH_D : ALU_3R<0b00000000000111100, "mulh.d">;
def MULH_DU : ALU_3R<0b00000000000111101, "mulh.du">;
def MULW_D_W : ALU_3R<0b00000000000111110, "mulw.d.w">;
def MULW_D_WU : ALU_3R<0b00000000000111111, "mulw.d.wu">;
+let usesCustomInserter = true in {
def DIV_D : ALU_3R<0b00000000001000100, "div.d">;
def MOD_D : ALU_3R<0b00000000001000101, "mod.d">;
def DIV_DU : ALU_3R<0b00000000001000110, "div.du">;
def MOD_DU : ALU_3R<0b00000000001000111, "mod.du">;
+} // usesCustomInserter = true
// Bit-shift Instructions for 64-bits
def SLL_D : ALU_3R<0b00000000000110001, "sll.d">;
@@ -545,6 +597,9 @@ def shiftMaskGRLen
: ComplexPattern<GRLenVT, 1, "selectShiftMaskGRLen", [], [], 0>;
def shiftMask32 : ComplexPattern<i64, 1, "selectShiftMask32", [], [], 0>;
+def sexti32 : ComplexPattern<i64, 1, "selectSExti32">;
+def zexti32 : ComplexPattern<i64, 1, "selectZExti32">;
+
class shiftop<SDPatternOperator operator>
: PatFrag<(ops node:$val, node:$count),
(operator node:$val, (GRLenVT (shiftMaskGRLen node:$count)))>;
@@ -556,6 +611,13 @@ let Predicates = [IsLA32] in {
def : PatGprGpr<add, ADD_W>;
def : PatGprImm<add, ADDI_W, simm12>;
def : PatGprGpr<sub, SUB_W>;
+def : PatGprGpr<sdiv, DIV_W>;
+def : PatGprGpr<udiv, DIV_WU>;
+def : PatGprGpr<srem, MOD_W>;
+def : PatGprGpr<urem, MOD_WU>;
+def : PatGprGpr<mul, MUL_W>;
+def : PatGprGpr<mulhs, MULH_W>;
+def : PatGprGpr<mulhu, MULH_WU>;
} // Predicates = [IsLA32]
let Predicates = [IsLA64] in {
@@ -565,6 +627,24 @@ def : PatGprImm<add, ADDI_D, simm12>;
def : PatGprImm_32<add, ADDI_W, simm12>;
def : PatGprGpr<sub, SUB_D>;
def : PatGprGpr_32<sub, SUB_W>;
+def : PatGprGpr<sdiv, DIV_D>;
+def : PatGprGpr<udiv, DIV_DU>;
+def : PatGprGpr<srem, MOD_D>;
+def : PatGprGpr<urem, MOD_DU>;
+// TODO: Select "_W[U]" instructions for i32xi32 if only lower 32 bits of the
+// product are used.
+def : PatGprGpr<mul, MUL_D>;
+def : PatGprGpr<mulhs, MULH_D>;
+def : PatGprGpr<mulhu, MULH_DU>;
+// Select MULW_D_W for calculating the full 64 bits product of i32xi32 signed
+// multiplication.
+def : Pat<(i64 (mul (sext_inreg GPR:$rj, i32), (sext_inreg GPR:$rk, i32))),
+ (MULW_D_W GPR:$rj, GPR:$rk)>;
+// Select MULW_D_WU for calculating the full 64 bits product of i32xi32
+// unsigned multiplication.
+def : Pat<(i64 (mul (loongarch_bstrpick GPR:$rj, (i64 31), (i64 0)),
+ (loongarch_bstrpick GPR:$rk, (i64 31), (i64 0)))),
+ (MULW_D_WU GPR:$rj, GPR:$rk)>;
} // Predicates = [IsLA64]
def : PatGprGpr<and, AND>;
@@ -649,19 +729,143 @@ def : Pat<(select GPR:$cond, GPR:$t, GPR:$f),
/// Branches and jumps
+class BccPat<PatFrag CondOp, LAInst Inst>
+ : Pat<(brcond (GRLenVT (CondOp GPR:$rj, GPR:$rd)), bb:$imm16),
+ (Inst GPR:$rj, GPR:$rd, bb:$imm16)>;
+
+def : BccPat<seteq, BEQ>;
+def : BccPat<setne, BNE>;
+def : BccPat<setlt, BLT>;
+def : BccPat<setge, BGE>;
+def : BccPat<setult, BLTU>;
+def : BccPat<setuge, BGEU>;
+
+class BccSwapPat<PatFrag CondOp, LAInst InstBcc>
+ : Pat<(brcond (GRLenVT (CondOp GPR:$rd, GPR:$rj)), bb:$imm16),
+ (InstBcc GPR:$rj, GPR:$rd, bb:$imm16)>;
+
+// Condition codes that don't have matching LoongArch branch instructions, but
+// are trivially supported by swapping the two input operands.
+def : BccSwapPat<setgt, BLT>;
+def : BccSwapPat<setle, BGE>;
+def : BccSwapPat<setugt, BLTU>;
+def : BccSwapPat<setule, BGEU>;
+
+// An extra pattern is needed for a brcond without a setcc (i.e. where the
+// condition was calculated elsewhere).
+def : Pat<(brcond GPR:$rj, bb:$imm21), (BNEZ GPR:$rj, bb:$imm21)>;
+
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in
+def PseudoBR : Pseudo<(outs), (ins simm26_lsl2:$imm26), [(br bb:$imm26)]>,
+ PseudoInstExpansion<(B simm26_lsl2:$imm26)>;
+
+let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in
+def PseudoBRIND : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16), []>,
+ PseudoInstExpansion<(JIRL R0, GPR:$rj, simm16_lsl2:$imm16)>;
+
+def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>;
+def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)),
+ (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>;
+
+let isCall = 1, Defs = [R1] in
+def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> {
+ let AsmString = "bl\t$func";
+}
+
+def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>;
+def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
+
+let isCall = 1, Defs = [R1] in
+def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj),
+ [(loongarch_call GPR:$rj)]>,
+ PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>;
+
let isBarrier = 1, isReturn = 1, isTerminator = 1 in
def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>,
PseudoInstExpansion<(JIRL R0, R1, 0)>;
-/// BSTRPICK
+/// BSTRINS and BSTRPICK
-let Predicates = [IsLA32] in
+let Predicates = [IsLA32] in {
+def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd),
+ (BSTRINS_W GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>;
def : Pat<(loongarch_bstrpick GPR:$rj, uimm5:$msbd, uimm5:$lsbd),
(BSTRPICK_W GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>;
+} // Predicates = [IsLA32]
-let Predicates = [IsLA64] in
+let Predicates = [IsLA64] in {
+def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
+ (BSTRINS_D GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>;
def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
(BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>;
+} // Predicates = [IsLA64]
+
+/// Loads
+
+multiclass LdPat<PatFrag LoadOp, LAInst Inst, ValueType vt = GRLenVT> {
+ def : Pat<(vt (LoadOp BaseAddr:$rj)), (Inst BaseAddr:$rj, 0)>;
+ def : Pat<(vt (LoadOp (add BaseAddr:$rj, simm12:$imm12))),
+ (Inst BaseAddr:$rj, simm12:$imm12)>;
+}
+
+defm : LdPat<sextloadi8, LD_B>;
+defm : LdPat<extloadi8, LD_B>;
+defm : LdPat<sextloadi16, LD_H>;
+defm : LdPat<extloadi16, LD_H>;
+defm : LdPat<load, LD_W>, Requires<[IsLA32]>;
+defm : LdPat<zextloadi8, LD_BU>;
+defm : LdPat<zextloadi16, LD_HU>;
+let Predicates = [IsLA64] in {
+defm : LdPat<sextloadi32, LD_W, i64>;
+defm : LdPat<extloadi32, LD_W, i64>;
+defm : LdPat<zextloadi32, LD_WU, i64>;
+defm : LdPat<load, LD_D, i64>;
+} // Predicates = [IsLA64]
+
+/// Stores
+
+multiclass StPat<PatFrag StoreOp, LAInst Inst, RegisterClass StTy,
+ ValueType vt> {
+ def : Pat<(StoreOp (vt StTy:$rd), BaseAddr:$rj),
+ (Inst StTy:$rd, BaseAddr:$rj, 0)>;
+ def : Pat<(StoreOp (vt StTy:$rd), (add BaseAddr:$rj, simm12:$imm12)),
+ (Inst StTy:$rd, BaseAddr:$rj, simm12:$imm12)>;
+}
+
+defm : StPat<truncstorei8, ST_B, GPR, GRLenVT>;
+defm : StPat<truncstorei16, ST_H, GPR, GRLenVT>;
+defm : StPat<store, ST_W, GPR, i32>, Requires<[IsLA32]>;
+let Predicates = [IsLA64] in {
+defm : StPat<truncstorei32, ST_W, GPR, i64>;
+defm : StPat<store, ST_D, GPR, i64>;
+} // Predicates = [IsLA64]
+
+/// Atomic loads and stores
+
+def : Pat<(atomic_fence timm, timm), (DBAR 0)>;
+
+defm : LdPat<atomic_load_8, LD_B>;
+defm : LdPat<atomic_load_16, LD_H>;
+defm : LdPat<atomic_load_32, LD_W>;
+
+defm : StPat<atomic_store_8, ST_B, GPR, GRLenVT>;
+defm : StPat<atomic_store_16, ST_H, GPR, GRLenVT>;
+defm : StPat<atomic_store_32, ST_W, GPR, i32>, Requires<[IsLA32]>;
+let Predicates = [IsLA64] in {
+defm : LdPat<atomic_load_64, LD_D>;
+defm : StPat<atomic_store_32, ST_W, GPR, i64>;
+defm : StPat<atomic_store_64, ST_D, GPR, i64>;
+} // Predicates = [IsLA64]
+
+/// Other pseudo-instructions
+
+// Pessimistically assume the stack pointer will be clobbered
+let Defs = [R3], Uses = [R3] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ [(callseq_start timm:$amt1, timm:$amt2)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+} // Defs = [R3], Uses = [R3]
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
index 7416c93b4d05..488c66f47863 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
@@ -22,6 +22,22 @@
using namespace llvm;
+static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
+ const AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+
+ // TODO: Processing target flags.
+
+ const MCExpr *ME =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
+
+ if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
+ ME = MCBinaryExpr::createAdd(
+ ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+
+ return MCOperand::createExpr(ME);
+}
+
bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
MCOperand &MCOp,
const AsmPrinter &AP) {
@@ -41,12 +57,21 @@ bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
case MachineOperand::MO_Immediate:
MCOp = MCOperand::createImm(MO.getImm());
break;
- // TODO: lower special operands
- case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+ break;
case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_BlockAddress:
+ MCOp = lowerSymbolOperand(MO, AP.getSymbolPreferLocal(*MO.getGlobal()), AP);
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), AP);
+ break;
case MachineOperand::MO_ExternalSymbol:
- case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = lowerSymbolOperand(
+ MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
+ break;
+ // TODO: lower special operands
+ case MachineOperand::MO_BlockAddress:
case MachineOperand::MO_JumpTableIndex:
break;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
index b9bae8e56304..05902ebb7ba6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
@@ -110,6 +110,28 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS) const {
+ // TODO: this implementation is a temporary placeholder which does just
+ // enough to allow other aspects of code generation to be tested.
+
assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
- // TODO: Implement this when we have function calls
+
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ DebugLoc DL = MI.getDebugLoc();
+
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ Register FrameReg;
+ StackOffset Offset =
+ TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) +
+ StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
+
+ // Offsets must be encodable with a 12-bit immediate field.
+ if (!isInt<12>(Offset.getFixed())) {
+ report_fatal_error("Frame offsets outside of the signed 12-bit range is "
+ "not supported currently");
+ }
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 3a1a46a9e624..468c4f43cb90 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -102,6 +102,7 @@ public:
return getTM<LoongArchTargetMachine>();
}
+ void addIRPasses() override;
bool addInstSelector() override;
};
} // namespace
@@ -111,6 +112,12 @@ LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) {
return new LoongArchPassConfig(*this, PM);
}
+void LoongArchPassConfig::addIRPasses() {
+ addPass(createAtomicExpandPass());
+
+ TargetPassConfig::addIRPasses();
+}
+
bool LoongArchPassConfig::addInstSelector() {
addPass(createLoongArchISelDag(getLoongArchTargetMachine()));
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
index c733c194e6a2..e50761ab1e27 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/Compiler.h"
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "LoongArchGenInstrInfo.inc"
#define GET_REGINFO_MC_DESC
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
index e576b9a49cd6..a606ccdbc47c 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
@@ -46,6 +46,7 @@ createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit);
// Defines symbolic names for LoongArch instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "LoongArchGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
index 3bcce9e3ba3b..4933d40f3388 100644
--- a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
+++ b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
@@ -77,6 +77,9 @@ bool M68kAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
void M68kAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ M68k_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
switch (MI->getOpcode()) {
default: {
if (MI->isPseudo()) {
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
index 2606e22410fc..e6290d4cbec5 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
@@ -31,6 +31,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "M68kGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h
index 0dc601ad876b..2a1cc678016a 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h
@@ -52,6 +52,7 @@ std::unique_ptr<MCObjectTargetWriter> createM68kELFObjectWriter(uint8_t OSABI);
// Defines symbolic names for the M68k instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "M68kGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 3f006056955d..13a880de68b5 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -22,6 +22,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "MSP430GenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 24b0b3298592..e596c3f1ce46 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -53,6 +53,7 @@ createMSP430ELFObjectWriter(uint8_t OSABI);
// Defines symbolic names for the MSP430 instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "MSP430GenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 85c59d5b14b5..9cd2cbe89e46 100644
--- a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -149,6 +149,9 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
//===----------------------------------------------------------------------===//
void MSP430AsmPrinter::emitInstruction(const MachineInstr *MI) {
+ MSP430_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
MSP430MCInstLower MCInstLowering(OutContext, *this);
MCInst TmpInst;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 6fc8fcb482cd..40c807082fdc 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -36,6 +36,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "MipsGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 8531177ee924..d51f3b9abcfd 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -55,6 +55,7 @@ StringRef selectMipsCPU(const Triple &TT, StringRef CPU);
// Defines symbolic names for the Mips instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "MipsGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 9330a791a7cc..fcaf450cc511 100644
--- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -181,6 +181,10 @@ static void emitDirectiveRelocJalr(const MachineInstr &MI,
}
void MipsAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ // FIXME: Enable feature predicate checks once all the test pass.
+ // Mips_MC::verifyInstructionPredicates(MI->getOpcode(),
+ // getSubtargetInfo().getFeatureBits());
+
MipsTargetStreamer &TS = getTargetStreamer();
unsigned Opc = MI->getOpcode();
TS.forbidModuleDirective();
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 856d03f0b210..0ba29fb48b05 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -23,6 +23,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "NVPTXGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index b394566edd0d..78f4e6745502 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -21,6 +21,7 @@
// Defines symbolic names for the PTX instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "NVPTXGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 41e9f375e536..8c92766faecb 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -183,6 +183,7 @@ enum CmpMode {
// Defines symbolic names for the NVPTX instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "NVPTXGenInstrInfo.inc"
#endif
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index b1d842122060..9977d8ba0300 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -139,6 +139,9 @@ VisitGlobalVariableForEmission(const GlobalVariable *GV,
}
void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
MCInst Inst;
lowerToMCInst(MI, Inst);
EmitToStreamer(*OutStreamer, Inst);
diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 2201eb19c80f..b4f7a64f144a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -270,10 +270,6 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
// ShuffleVector
return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1],
NewOperands[2]);
- case Instruction::InsertValue:
- // InsertValueConstantExpr
- return Builder.CreateInsertValue(NewOperands[0], NewOperands[1],
- C->getIndices());
case Instruction::GetElementPtr:
// GetElementPtrConstantExpr
return Builder.CreateGEP(cast<GEPOperator>(C)->getSourceElementType(),
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 746f652bfa36..6ad016dfa0a7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1861,7 +1861,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Ret.getValue(2);
if (ProxyRegTruncates[i]) {
- Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret);
+ Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].value(), Ret);
}
InVals.push_back(Ret);
diff --git a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index eeedce2d99cb..202134ed7035 100644
--- a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -35,6 +35,8 @@ public:
bool runOnFunction(Function &F) override;
+ StringRef getPassName() const override { return "NVPTX Image Optimizer"; }
+
private:
bool replaceIsTypePSampler(Instruction &I);
bool replaceIsTypePSurface(Instruction &I);
diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 16fbe1a65562..7929bd2e0df0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -36,6 +36,8 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return "NVPTX Prolog Epilog Pass"; }
+
private:
void calculateFrameObjectOffsets(MachineFunction &Fn);
};
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 2d6d72777db2..4e41515b997d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -18,7 +18,6 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
#include <algorithm>
#include <cstring>
@@ -32,19 +31,27 @@ namespace llvm {
namespace {
typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t;
typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
-typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
-} // anonymous namespace
-static ManagedStatic<per_module_annot_t> annotationCache;
-static sys::Mutex Lock;
+struct AnnotationCache {
+ sys::Mutex Lock;
+ std::map<const Module *, global_val_annot_t> Cache;
+};
+
+AnnotationCache &getAnnotationCache() {
+ static AnnotationCache AC;
+ return AC;
+}
+} // anonymous namespace
void clearAnnotationCache(const Module *Mod) {
- std::lock_guard<sys::Mutex> Guard(Lock);
- annotationCache->erase(Mod);
+ auto &AC = getAnnotationCache();
+ std::lock_guard<sys::Mutex> Guard(AC.Lock);
+ AC.Cache.erase(Mod);
}
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
- std::lock_guard<sys::Mutex> Guard(Lock);
+ auto &AC = getAnnotationCache();
+ std::lock_guard<sys::Mutex> Guard(AC.Lock);
assert(md && "Invalid mdnode for annotation");
assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
// start index = 1, to skip the global variable key
@@ -70,7 +77,8 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
}
static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
- std::lock_guard<sys::Mutex> Guard(Lock);
+ auto &AC = getAnnotationCache();
+ std::lock_guard<sys::Mutex> Guard(AC.Lock);
NamedMDNode *NMD = m->getNamedMetadata("nvvm.annotations");
if (!NMD)
return;
@@ -93,40 +101,42 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
if (tmp.empty()) // no annotations for this gv
return;
- if ((*annotationCache).find(m) != (*annotationCache).end())
- (*annotationCache)[m][gv] = std::move(tmp);
+ if (AC.Cache.find(m) != AC.Cache.end())
+ AC.Cache[m][gv] = std::move(tmp);
else {
global_val_annot_t tmp1;
tmp1[gv] = std::move(tmp);
- (*annotationCache)[m] = std::move(tmp1);
+ AC.Cache[m] = std::move(tmp1);
}
}
bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
unsigned &retval) {
- std::lock_guard<sys::Mutex> Guard(Lock);
+ auto &AC = getAnnotationCache();
+ std::lock_guard<sys::Mutex> Guard(AC.Lock);
const Module *m = gv->getParent();
- if ((*annotationCache).find(m) == (*annotationCache).end())
+ if (AC.Cache.find(m) == AC.Cache.end())
cacheAnnotationFromMD(m, gv);
- else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
+ else if (AC.Cache[m].find(gv) == AC.Cache[m].end())
cacheAnnotationFromMD(m, gv);
- if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
+ if (AC.Cache[m][gv].find(prop) == AC.Cache[m][gv].end())
return false;
- retval = (*annotationCache)[m][gv][prop][0];
+ retval = AC.Cache[m][gv][prop][0];
return true;
}
bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
std::vector<unsigned> &retval) {
- std::lock_guard<sys::Mutex> Guard(Lock);
+ auto &AC = getAnnotationCache();
+ std::lock_guard<sys::Mutex> Guard(AC.Lock);
const Module *m = gv->getParent();
- if ((*annotationCache).find(m) == (*annotationCache).end())
+ if (AC.Cache.find(m) == AC.Cache.end())
cacheAnnotationFromMD(m, gv);
- else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
+ else if (AC.Cache[m].find(gv) == AC.Cache[m].end())
cacheAnnotationFromMD(m, gv);
- if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
+ if (AC.Cache[m][gv].find(prop) == AC.Cache[m][gv].end())
return false;
- retval = (*annotationCache)[m][gv][prop];
+ retval = AC.Cache[m][gv][prop];
return true;
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 46bbc44e1681..fa9e69f2e607 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -449,12 +449,9 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return MO.getImm();
}
-void PPCMCCodeEmitter::encodeInstruction(
- const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
+void PPCMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
// Output the constant in big/little endian byte order.
@@ -492,5 +489,4 @@ bool PPCMCCodeEmitter::isPrefixedInstruction(const MCInst &MI) const {
return InstrInfo->isPrefixed(Opcode);
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "PPCGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index 39b2f1211f29..c4d4d35a6665 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -121,12 +121,6 @@ public:
// Is this instruction a prefixed instruction.
bool isPrefixedInstruction(const MCInst &MI) const;
-
-private:
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a651362f703b..1008dc63d064 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -48,6 +48,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "PPCGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index acb860e16518..3ca6f394f60b 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -118,6 +118,7 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
//
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_SCHED_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "PPCGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 22f35c8fa8d3..58a75baf8081 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -230,6 +230,9 @@ private:
void emitGlobalVariableHelper(const GlobalVariable *);
+ // Get the offset of an alias based on its AliaseeObject.
+ uint64_t getAliasOffset(const Constant *C);
+
public:
PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: PPCAsmPrinter(TM, std::move(Streamer)) {
@@ -656,6 +659,9 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
/// the current output stream.
///
void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ PPC_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
MCInst TmpInst;
const bool IsPPC64 = Subtarget->isPPC64();
const bool IsAIX = Subtarget->isAIXABI();
@@ -2352,6 +2358,24 @@ static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {
.Default(false);
}
+uint64_t PPCAIXAsmPrinter::getAliasOffset(const Constant *C) {
+ if (auto *GA = dyn_cast<GlobalAlias>(C))
+ return getAliasOffset(GA->getAliasee());
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ const MCExpr *LowC = lowerConstant(CE);
+ const MCBinaryExpr *CBE = dyn_cast<MCBinaryExpr>(LowC);
+ if (!CBE)
+ return 0;
+ if (CBE->getOpcode() != MCBinaryExpr::Add)
+ report_fatal_error("Only adding an offset is supported now.");
+ auto *RHS = dyn_cast<MCConstantExpr>(CBE->getRHS());
+ if (!RHS)
+ report_fatal_error("Unable to get the offset of alias.");
+ return RHS->getValue();
+ }
+ return 0;
+}
+
void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
// Special LLVM global arrays have been handled at the initialization.
if (isSpecialLLVMGlobalArrayToSkip(GV) || isSpecialLLVMGlobalArrayForStaticInit(GV))
@@ -2422,20 +2446,34 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) {
}
MCSymbol *EmittedInitSym = GVSym;
+
+ // Emit linkage for the global variable and its aliases.
emitLinkage(GV, EmittedInitSym);
+ for (const GlobalAlias *GA : GOAliasMap[GV])
+ emitLinkage(GA, getSymbol(GA));
+
emitAlignment(getGVAlignment(GV, DL), GV);
// When -fdata-sections is enabled, every GlobalVariable will
// be put into its own csect; therefore, label is not necessary here.
- if (!TM.getDataSections() || GV->hasSection()) {
+ if (!TM.getDataSections() || GV->hasSection())
OutStreamer->emitLabel(EmittedInitSym);
+
+ // No alias to emit.
+ if (!GOAliasMap[GV].size()) {
+ emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+ return;
}
- // Emit aliasing label for global variable.
- for (const GlobalAlias *Alias : GOAliasMap[GV])
- OutStreamer->emitLabel(getSymbol(Alias));
+ // Aliases with the same offset should be aligned. Record the list of aliases
+ // associated with the offset.
+ AliasMapTy AliasList;
+ for (const GlobalAlias *GA : GOAliasMap[GV])
+ AliasList[getAliasOffset(GA->getAliasee())].push_back(GA);
- emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+ // Emit alias label and element value for global variable.
+ emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ &AliasList);
}
void PPCAIXAsmPrinter::emitFunctionDescriptor() {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5b9d1e66b04e..3c461a627d61 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -392,8 +392,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// MASS transformation for LLVM intrinsics with replicating fast-math flag
// to be consistent to PPCGenScalarMASSEntries pass
- if (TM.getOptLevel() == CodeGenOpt::Aggressive &&
- TM.Options.PPCGenScalarMASSEntries) {
+ if (TM.getOptLevel() == CodeGenOpt::Aggressive) {
setOperationAction(ISD::FSIN , MVT::f64, Custom);
setOperationAction(ISD::FCOS , MVT::f64, Custom);
setOperationAction(ISD::FPOW , MVT::f64, Custom);
@@ -17886,13 +17885,17 @@ bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
return Op.getNode()->getFlags().hasApproximateFuncs();
}
+bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
+ return getTargetMachine().Options.PPCGenScalarMASSEntries;
+}
+
SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
const char *LibCallFloatName,
const char *LibCallDoubleNameFinite,
const char *LibCallFloatNameFinite,
SDValue Op,
SelectionDAG &DAG) const {
- if (!isLowringToMASSSafe(Op))
+ if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
return SDValue();
if (!isLowringToMASSFiniteSafe(Op))
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index f92a117fe27f..4a08cc42fa9d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1293,6 +1293,7 @@ namespace llvm {
SelectionDAG &DAG) const;
bool isLowringToMASSFiniteSafe(SDValue Op) const;
bool isLowringToMASSSafe(SDValue Op) const;
+ bool isScalarMASSConversionEnabled() const;
SDValue lowerLibCallBase(const char *LibCallDoubleName,
const char *LibCallFloatName,
const char *LibCallDoubleNameFinite,
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index fbd487fbcfd5..59e8f3ff84a4 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -43,7 +43,6 @@ namespace {
}
const PPCInstrInfo *TII;
- LiveIntervals *LIS;
protected:
bool processBlock(MachineBasicBlock &MBB) {
@@ -83,11 +82,8 @@ protected:
Register InReg = PPC::NoRegister;
Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
- SmallVector<Register, 3> OrigRegs = {OutReg, GPR3};
- if (!IsPCREL) {
+ if (!IsPCREL)
InReg = MI.getOperand(1).getReg();
- OrigRegs.push_back(InReg);
- }
DebugLoc DL = MI.getDebugLoc();
unsigned Opc1, Opc2;
@@ -139,11 +135,6 @@ protected:
BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
.addImm(0);
- // The ADDItls* instruction is the first instruction in the
- // repair range.
- MachineBasicBlock::iterator First = I;
- --First;
-
if (IsAIX) {
// The variable offset and region handle are copied in r4 and r3. The
// copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
@@ -177,16 +168,10 @@ protected:
BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
.addReg(GPR3);
- // The COPY is the last instruction in the repair range.
- MachineBasicBlock::iterator Last = I;
- --Last;
-
// Move past the original instruction and remove it.
++I;
MI.removeFromParent();
- // Repair the live intervals.
- LIS->repairIntervalsInRange(&MBB, First, Last, OrigRegs);
Changed = true;
}
@@ -204,7 +189,6 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override {
TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
- LIS = &getAnalysis<LiveIntervals>();
bool Changed = false;
@@ -217,9 +201,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 7c062387fecd..a335b2d23394 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -84,12 +84,6 @@ public:
unsigned getVMaskReg(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
-
-private:
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
@@ -188,9 +182,6 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS,
void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
// Get byte count of instruction.
unsigned Size = Desc.getSize();
@@ -403,5 +394,4 @@ unsigned RISCVMCCodeEmitter::getVMaskReg(const MCInst &MI, unsigned OpNo,
}
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "RISCVGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 917d93479f18..c63e0c8e737d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/ErrorHandling.h"
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "RISCVGenInstrInfo.inc"
#define GET_REGINFO_MC_DESC
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
index 276fc9efb6c0..d157257d976c 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -45,6 +45,7 @@ std::unique_ptr<MCObjectTargetWriter> createRISCVELFObjectWriter(uint8_t OSABI,
// Defines symbolic names for RISC-V instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "RISCVGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 5b2a247ebda0..edd39f6547ed 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -91,6 +91,9 @@ void RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
#include "RISCVGenMCPseudoLowering.inc"
void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ RISCV_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 57d8ba6f0161..a7286b2963c2 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -899,7 +899,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
std::pair<int64_t, Align>
-RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const {
+RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
// Create a buffer of RVV objects to allocate.
SmallVector<int, 8> ObjectsToAllocate;
for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
@@ -912,10 +913,18 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const {
ObjectsToAllocate.push_back(I);
}
- // Allocate all RVV locals and spills
- int64_t Offset = 0;
// The minimum alignment is 16 bytes.
Align RVVStackAlign(16);
+ const auto &ST = MF.getSubtarget<RISCVSubtarget>();
+
+ if (!ST.hasVInstructions()) {
+ assert(ObjectsToAllocate.empty() &&
+ "Can't allocate scalable-vector objects without V instructions");
+ return std::make_pair(0, RVVStackAlign);
+ }
+
+ // Allocate all RVV locals and spills
+ int64_t Offset = 0;
for (int FI : ObjectsToAllocate) {
// ObjectSize in bytes.
int64_t ObjectSize = MFI.getObjectSize(FI);
@@ -997,7 +1006,7 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
int64_t RVVStackSize;
Align RVVStackAlign;
- std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MFI);
+ std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MF);
RVFI->setRVVStackSize(RVVStackSize);
RVFI->setRVVStackAlign(RVVStackAlign);
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 466cd059b749..a5cf68a6ea94 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -84,7 +84,7 @@ private:
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
int64_t Amount, MachineInstr::MIFlag Flag) const;
std::pair<int64_t, Align>
- assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const;
+ assignRVVStackObjectOffsets(MachineFunction &MF) const;
};
-}
+} // namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index cfaafc7b53d2..5b823af1e9b8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -43,92 +43,95 @@ namespace RISCV {
} // namespace llvm
void RISCVDAGToDAGISel::PreprocessISelDAG() {
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end();
- I != E;) {
- SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
- // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
- // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
- if (N->getOpcode() == ISD::SPLAT_VECTOR) {
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ if (N->use_empty())
+ continue;
+
+ SDValue Result;
+ switch (N->getOpcode()) {
+ case ISD::SPLAT_VECTOR: {
+ // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
+ // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
MVT VT = N->getSimpleValueType(0);
unsigned Opc =
VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
SDLoc DL(N);
SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
- SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
- N->getOperand(0), VL);
-
- --I;
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
- ++I;
- CurDAG->DeleteNode(N);
- continue;
+ Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
+ N->getOperand(0), VL);
+ break;
}
+ case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
+ // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
+ // load. Done after lowering and combining so that we have a chance to
+ // optimize this to VMV_V_X_VL when the upper bits aren't needed.
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands");
+ MVT VT = N->getSimpleValueType(0);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Lo = N->getOperand(1);
+ SDValue Hi = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+ assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
+ Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
+ "Unexpected VTs!");
+ MachineFunction &MF = CurDAG->getMachineFunction();
+ RISCVMachineFunctionInfo *FuncInfo =
+ MF.getInfo<RISCVMachineFunctionInfo>();
+ SDLoc DL(N);
- // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
- // load. Done after lowering and combining so that we have a chance to
- // optimize this to VMV_V_X_VL when the upper bits aren't needed.
- if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL)
- continue;
-
- assert(N->getNumOperands() == 4 && "Unexpected number of operands");
- MVT VT = N->getSimpleValueType(0);
- SDValue Passthru = N->getOperand(0);
- SDValue Lo = N->getOperand(1);
- SDValue Hi = N->getOperand(2);
- SDValue VL = N->getOperand(3);
- assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
- Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
- "Unexpected VTs!");
- MachineFunction &MF = CurDAG->getMachineFunction();
- RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
- SDLoc DL(N);
-
- // We use the same frame index we use for moving two i32s into 64-bit FPR.
- // This is an analogous operation.
- int FI = FuncInfo->getMoveF64FrameIndex(MF);
- MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
- const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
- SDValue StackSlot =
- CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
+ // We use the same frame index we use for moving two i32s into 64-bit FPR.
+ // This is an analogous operation.
+ int FI = FuncInfo->getMoveF64FrameIndex(MF);
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
+ const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
+ SDValue StackSlot =
+ CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
- SDValue Chain = CurDAG->getEntryNode();
- Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
+ SDValue Chain = CurDAG->getEntryNode();
+ Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
- SDValue OffsetSlot =
- CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
- Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
- Align(8));
+ SDValue OffsetSlot =
+ CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
+ Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
+ Align(8));
- Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
- SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
- SDValue IntID =
- CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
- SDValue Ops[] = {Chain,
- IntID,
- Passthru,
- StackSlot,
- CurDAG->getRegister(RISCV::X0, MVT::i64),
- VL};
+ SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
+ SDValue IntID =
+ CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
+ SDValue Ops[] = {Chain,
+ IntID,
+ Passthru,
+ StackSlot,
+ CurDAG->getRegister(RISCV::X0, MVT::i64),
+ VL};
- SDValue Result = CurDAG->getMemIntrinsicNode(
- ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8),
- MachineMemOperand::MOLoad);
+ Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
+ MVT::i64, MPI, Align(8),
+ MachineMemOperand::MOLoad);
+ break;
+ }
+ }
- // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the
- // vlse we created. This will cause general havok on the dag because
- // anything below the conversion could be folded into other existing nodes.
- // To avoid invalidating 'I', back it up to the convert node.
- --I;
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ if (Result) {
+ LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: ");
+ LLVM_DEBUG(N->dump(CurDAG));
+ LLVM_DEBUG(dbgs() << "\nNew: ");
+ LLVM_DEBUG(Result->dump(CurDAG));
+ LLVM_DEBUG(dbgs() << "\n");
- // Now that we did that, the node is dead. Increment the iterator to the
- // next node to process, then delete N.
- ++I;
- CurDAG->DeleteNode(N);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ MadeChange = true;
+ }
}
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
@@ -143,7 +146,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
continue;
MadeChange |= doPeepholeSExtW(N);
- MadeChange |= doPeepholeLoadStoreADDI(N);
MadeChange |= doPeepholeMaskedRVV(N);
}
@@ -153,40 +155,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
CurDAG->RemoveDeadNodes();
}
-// Returns true if N is a MachineSDNode that has a reg and simm12 memory
-// operand. The indices of the base pointer and offset are returned in BaseOpIdx
-// and OffsetOpIdx.
-static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx,
- unsigned &OffsetOpIdx) {
- switch (N->getMachineOpcode()) {
- case RISCV::LB:
- case RISCV::LH:
- case RISCV::LW:
- case RISCV::LBU:
- case RISCV::LHU:
- case RISCV::LWU:
- case RISCV::LD:
- case RISCV::FLH:
- case RISCV::FLW:
- case RISCV::FLD:
- BaseOpIdx = 0;
- OffsetOpIdx = 1;
- return true;
- case RISCV::SB:
- case RISCV::SH:
- case RISCV::SW:
- case RISCV::SD:
- case RISCV::FSH:
- case RISCV::FSW:
- case RISCV::FSD:
- BaseOpIdx = 1;
- OffsetOpIdx = 2;
- return true;
- }
-
- return false;
-}
-
static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
RISCVMatInt::InstSeq &Seq) {
SDNode *Result = nullptr;
@@ -285,9 +253,7 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
SDValue Chain = Node->getOperand(0);
SDValue Glue;
- SDValue Base;
- SelectBaseAddr(Node->getOperand(CurOp++), Base);
- Operands.push_back(Base); // Base pointer.
+ Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
if (IsStridedOrIndexed) {
Operands.push_back(Node->getOperand(CurOp++)); // Index.
@@ -651,83 +617,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
return;
}
- case ISD::ADD: {
- // Try to select ADD + immediate used as memory addresses to
- // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by
- // doPeepholeLoadStoreADDI.
-
- // LHS should be an immediate.
- auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
- if (!N1C)
- break;
-
- int64_t Offset = N1C->getSExtValue();
- int64_t Lo12 = SignExtend64<12>(Offset);
-
- // Don't do this if the lower 12 bits are 0 or we could use ADDI directly.
- if (Lo12 == 0 || isInt<12>(Offset))
- break;
-
- // Don't do this if we can use a pair of ADDIs.
- if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2))
- break;
-
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits());
-
- Offset -= Lo12;
- // Restore sign bits for RV32.
- if (!Subtarget->is64Bit())
- Offset = SignExtend64<32>(Offset);
-
- // We can fold if the last operation is an ADDI or its an ADDIW that could
- // be treated as an ADDI.
- if (Seq.back().Opc != RISCV::ADDI &&
- !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset)))
- break;
- assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12");
- // Drop the last operation.
- Seq.pop_back();
- assert(!Seq.empty() && "Expected more instructions in sequence");
-
- bool AllPointerUses = true;
- for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
-
- // Is this user a memory instruction that uses a register and immediate
- // that has this ADD as its pointer.
- unsigned BaseOpIdx, OffsetOpIdx;
- if (!User->isMachineOpcode() ||
- !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) ||
- UI.getOperandNo() != BaseOpIdx) {
- AllPointerUses = false;
- break;
- }
-
- // If the memory instruction already has an offset, make sure the combined
- // offset is foldable.
- int64_t MemOffs =
- cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue();
- MemOffs += Lo12;
- if (!isInt<12>(MemOffs)) {
- AllPointerUses = false;
- break;
- }
- }
-
- if (!AllPointerUses)
- break;
-
- // Emit (ADDI (ADD X, Hi), Lo)
- SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq);
- SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT,
- Node->getOperand(0), SDValue(Imm, 0));
- SDNode *ADDI =
- CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0),
- CurDAG->getTargetConstant(Lo12, DL, VT));
- ReplaceNode(Node, ADDI);
- return;
- }
case ISD::SHL: {
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
if (!N1C)
@@ -856,10 +745,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C)
break;
- uint64_t C2 = C->getZExtValue();
+ unsigned C2 = C->getZExtValue();
unsigned XLen = Subtarget->getXLen();
- if (!C2 || C2 >= XLen)
- break;
+ assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
uint64_t C1 = N1C->getZExtValue();
@@ -885,10 +773,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
// with c3 leading zeros.
if (!LeftShift && isMask_64(C1)) {
- uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
- if (C2 < C3) {
+ unsigned Leading = XLen - (64 - countLeadingZeros(C1));
+ if (C2 < Leading) {
// If the number of leading zeros is C2+32 this can be SRLIW.
- if (C2 + 32 == C3) {
+ if (C2 + 32 == Leading) {
SDNode *SRLIW = CurDAG->getMachineNode(
RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
ReplaceNode(Node, SRLIW);
@@ -900,7 +788,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
//
// This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
// legalized and goes through DAG combine.
- if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&
+ if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
SDNode *SRAIW =
@@ -908,25 +796,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
CurDAG->getTargetConstant(31, DL, VT));
SDNode *SRLIW = CurDAG->getMachineNode(
RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
- CurDAG->getTargetConstant(C3 - 32, DL, VT));
+ CurDAG->getTargetConstant(Leading - 32, DL, VT));
ReplaceNode(Node, SRLIW);
return;
}
// (srli (slli x, c3-c2), c3).
// Skip if we could use (zext.w (sraiw X, C2)).
- bool Skip = Subtarget->hasStdExtZba() && C3 == 32 &&
+ bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
// Also Skip if we can use bexti.
- Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1;
+ Skip |= Subtarget->hasStdExtZbs() && Leading == XLen - 1;
if (OneUseOrZExtW && !Skip) {
SDNode *SLLI = CurDAG->getMachineNode(
RISCV::SLLI, DL, VT, X,
- CurDAG->getTargetConstant(C3 - C2, DL, VT));
- SDNode *SRLI =
- CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(Leading - C2, DL, VT));
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(Leading, DL, VT));
ReplaceNode(Node, SRLI);
return;
}
@@ -936,12 +824,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
// shifted by c2 bits with c3 leading zeros.
if (LeftShift && isShiftedMask_64(C1)) {
- uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
+ unsigned Leading = XLen - (64 - countLeadingZeros(C1));
- if (C2 + C3 < XLen &&
- C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
+ if (C2 + Leading < XLen &&
+ C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
// Use slli.uw when possible.
- if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
+ if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
SDNode *SLLI_UW = CurDAG->getMachineNode(
RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
ReplaceNode(Node, SLLI_UW);
@@ -952,10 +840,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (OneUseOrZExtW && !IsCANDI) {
SDNode *SLLI = CurDAG->getMachineNode(
RISCV::SLLI, DL, VT, X,
- CurDAG->getTargetConstant(C2 + C3, DL, VT));
- SDNode *SRLI =
- CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(C2 + Leading, DL, VT));
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(Leading, DL, VT));
ReplaceNode(Node, SRLI);
return;
}
@@ -965,9 +853,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
// shifted mask with c2 leading zeros and c3 trailing zeros.
if (!LeftShift && isShiftedMask_64(C1)) {
- uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
- uint64_t C3 = countTrailingZeros(C1);
- if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) {
+ unsigned Leading = XLen - (64 - countLeadingZeros(C1));
+ unsigned Trailing = countTrailingZeros(C1);
+ if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && !IsCANDI) {
unsigned SrliOpc = RISCV::SRLI;
// If the input is zexti32 we should use SRLIW.
if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
@@ -976,22 +864,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
X = X.getOperand(0);
}
SDNode *SRLI = CurDAG->getMachineNode(
- SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT));
+ SrliOpc, DL, VT, X,
+ CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(Trailing, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
// If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
- if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
+ if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
OneUseOrZExtW && !IsCANDI) {
- SDNode *SRLIW =
- CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,
- CurDAG->getTargetConstant(C2 + C3, DL, VT));
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, X,
+ CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(Trailing, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
@@ -1000,25 +889,26 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
// shifted mask with no leading zeros and c3 trailing zeros.
if (LeftShift && isShiftedMask_64(C1)) {
- uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
- uint64_t C3 = countTrailingZeros(C1);
- if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) {
+ unsigned Leading = XLen - (64 - countLeadingZeros(C1));
+ unsigned Trailing = countTrailingZeros(C1);
+ if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
SDNode *SRLI = CurDAG->getMachineNode(
- RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT));
+ RISCV::SRLI, DL, VT, X,
+ CurDAG->getTargetConstant(Trailing - C2, DL, VT));
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(Trailing, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
// If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
- if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
- SDNode *SRLIW =
- CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,
- CurDAG->getTargetConstant(C3 - C2, DL, VT));
+ if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, X,
+ CurDAG->getTargetConstant(Trailing - C2, DL, VT));
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(Trailing, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
@@ -1885,13 +1775,74 @@ bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
return false;
}
-bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {
- // If this is FrameIndex, select it directly. Otherwise just let it get
- // selected to a register independently.
- if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr))
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
- else
- Base = Addr;
+// Fold constant addresses.
+static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
+ const MVT VT, const RISCVSubtarget *Subtarget,
+ SDValue Addr, SDValue &Base, SDValue &Offset) {
+ if (!isa<ConstantSDNode>(Addr))
+ return false;
+
+ int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
+
+ // If the constant is a simm12, we can fold the whole constant and use X0 as
+ // the base. If the constant can be materialized with LUI+simm12, use LUI as
+ // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
+ int64_t Lo12 = SignExtend64<12>(CVal);
+ int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
+ if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
+ if (Hi) {
+ int64_t Hi20 = (Hi >> 12) & 0xfffff;
+ Base = SDValue(
+ CurDAG->getMachineNode(RISCV::LUI, DL, VT,
+ CurDAG->getTargetConstant(Hi20, DL, VT)),
+ 0);
+ } else {
+ Base = CurDAG->getRegister(RISCV::X0, VT);
+ }
+ Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
+ return true;
+ }
+
+ // Ask how constant materialization would handle this constant.
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits());
+
+ // If the last instruction would be an ADDI, we can fold its immediate and
+ // emit the rest of the sequence as the base.
+ if (Seq.back().Opc != RISCV::ADDI)
+ return false;
+ Lo12 = Seq.back().Imm;
+
+ // Drop the last instruction.
+ Seq.pop_back();
+ assert(!Seq.empty() && "Expected more instructions in sequence");
+
+ Base = SDValue(selectImmSeq(CurDAG, DL, VT, Seq), 0);
+ Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
+ return true;
+}
+
+// Is this ADD instruction only used as the base pointer of scalar loads and
+// stores?
+static bool isWorthFoldingAdd(SDValue Add) {
+ for (auto Use : Add->uses()) {
+ if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
+ Use->getOpcode() != ISD::ATOMIC_LOAD &&
+ Use->getOpcode() != ISD::ATOMIC_STORE)
+ return false;
+ EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
+ if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
+ VT != MVT::f64)
+ return false;
+ // Don't allow stores of the value. It must be used as the address.
+ if (Use->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(Use)->getValue() == Add)
+ return false;
+ if (Use->getOpcode() == ISD::ATOMIC_STORE &&
+ cast<AtomicSDNode>(Use)->getVal() == Add)
+ return false;
+ }
+
return true;
}
@@ -1947,9 +1898,10 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
assert(!isInt<12>(CVal) && "simm12 not already handled?");
+ // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
+ // an ADDI for part of the offset and fold the rest into the load/store.
+ // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
- // We can use an ADDI for part of the offset and fold the rest into the
- // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
int64_t Adj = CVal < 0 ? -2048 : 2047;
Base = SDValue(
CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
@@ -1958,8 +1910,27 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
return true;
}
+
+ // For larger immediates, we might be able to save one instruction from
+ // constant materialization by folding the Lo12 bits of the immediate into
+ // the address. We should only do this if the ADD is only used by loads and
+ // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
+ // separately with the full materialized immediate creating extra
+ // instructions.
+ if (isWorthFoldingAdd(Addr) &&
+ selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
+ Offset)) {
+ // Insert an ADD instruction with the materialized Hi52 bits.
+ Base = SDValue(
+ CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
+ 0);
+ return true;
+ }
}
+ if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
+ return true;
+
Base = Addr;
Offset = CurDAG->getTargetConstant(0, DL, VT);
return true;
@@ -2044,6 +2015,101 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
return false;
}
+/// Look for various patterns that can be done with a SHL that can be folded
+/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
+/// SHXADD we are trying to match.
+bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
+ SDValue &Val) {
+ if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
+ SDValue N0 = N.getOperand(0);
+
+ bool LeftShift = N0.getOpcode() == ISD::SHL;
+ if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ uint64_t Mask = N.getConstantOperandVal(1);
+ unsigned C2 = N0.getConstantOperandVal(1);
+
+ unsigned XLen = Subtarget->getXLen();
+ if (LeftShift)
+ Mask &= maskTrailingZeros<uint64_t>(C2);
+ else
+ Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
+
+ // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
+ // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
+ // followed by a SHXADD with c3 for the X amount.
+ if (isShiftedMask_64(Mask)) {
+ unsigned Leading = XLen - (64 - countLeadingZeros(Mask));
+ unsigned Trailing = countTrailingZeros(Mask);
+ if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
+ 0);
+ return true;
+ }
+ // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
+ // leading zeros and c3 trailing zeros. We can use an SRLI by C3
+ // followed by a SHXADD using c3 for the X amount.
+ if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(
+ CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
+ 0);
+ return true;
+ }
+ }
+ }
+ }
+
+ bool LeftShift = N.getOpcode() == ISD::SHL;
+ if ((LeftShift || N.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N.getOperand(1))) {
+ SDValue N0 = N.getOperand(0);
+ if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ uint64_t Mask = N0.getConstantOperandVal(1);
+ if (isShiftedMask_64(Mask)) {
+ unsigned C1 = N.getConstantOperandVal(1);
+ unsigned XLen = Subtarget->getXLen();
+ unsigned Leading = XLen - (64 - countLeadingZeros(Mask));
+ unsigned Trailing = countTrailingZeros(Mask);
+ // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
+ // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
+ if (LeftShift && Leading == 32 && Trailing > 0 &&
+ (Trailing + C1) == ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Trailing, DL, VT)),
+ 0);
+ return true;
+ }
+ // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
+ // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
+ if (!LeftShift && Leading == 32 && Trailing > C1 &&
+ (Trailing - C1) == ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Trailing, DL, VT)),
+ 0);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
// Return true if all users of this SDNode* only consume the lower \p Bits.
// This can be used to form W instructions for add/sub/mul/shl even when the
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
@@ -2271,102 +2337,6 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
return false;
}
-// Merge an ADDI into the offset of a load/store instruction where possible.
-// (load (addi base, off1), off2) -> (load base, off1+off2)
-// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
-// (load (add base, (addi src, off1)), off2)
-// -> (load (add base, src), off1+off2)
-// (store val, (add base, (addi src, off1)), off2)
-// -> (store val, (add base, src), off1+off2)
-// This is possible when off1+off2 fits a 12-bit immediate.
-bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
- unsigned OffsetOpIdx, BaseOpIdx;
- if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx))
- return false;
-
- if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
- return false;
-
- SDValue Base = N->getOperand(BaseOpIdx);
-
- if (!Base.isMachineOpcode())
- return false;
-
- if (Base.getMachineOpcode() == RISCV::ADDI) {
- // If the base is an ADDI, we can merge it in to the load/store.
- } else if (Base.getMachineOpcode() == RISCV::ADDIW &&
- isa<ConstantSDNode>(Base.getOperand(1)) &&
- Base.getOperand(0).isMachineOpcode() &&
- Base.getOperand(0).getMachineOpcode() == RISCV::LUI &&
- isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) {
- // ADDIW can be merged if it's part of LUI+ADDIW constant materialization
- // and LUI+ADDI would have produced the same result. This is true for all
- // simm32 values except 0x7ffff800-0x7fffffff.
- int64_t Offset =
- SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12);
- Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue();
- if (!isInt<32>(Offset))
- return false;
- } else
- return false;
-
- SDValue ImmOperand = Base.getOperand(1);
- uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
-
- if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
- int64_t Offset1 = Const->getSExtValue();
- int64_t CombinedOffset = Offset1 + Offset2;
- if (!isInt<12>(CombinedOffset))
- return false;
- ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
- ImmOperand.getValueType());
- } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
- // If the off1 in (addi base, off1) is a global variable's address (its
- // low part, really), then we can rely on the alignment of that variable
- // to provide a margin of safety before off1 can overflow the 12 bits.
- // Check if off2 falls within that margin; if so off1+off2 can't overflow.
- const DataLayout &DL = CurDAG->getDataLayout();
- Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL),
- GA->getOffset());
- if (Offset2 != 0 && Alignment <= Offset2)
- return false;
- int64_t Offset1 = GA->getOffset();
- int64_t CombinedOffset = Offset1 + Offset2;
- ImmOperand = CurDAG->getTargetGlobalAddress(
- GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
- CombinedOffset, GA->getTargetFlags());
- } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
- // Ditto.
- Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
- if (Offset2 != 0 && Alignment <= Offset2)
- return false;
- int64_t Offset1 = CP->getOffset();
- int64_t CombinedOffset = Offset1 + Offset2;
- ImmOperand = CurDAG->getTargetConstantPool(
- CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
- CombinedOffset, CP->getTargetFlags());
- } else {
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
- LLVM_DEBUG(Base->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\nN: ");
- LLVM_DEBUG(N->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\n");
-
- // Modify the offset operand of the load/store.
- if (BaseOpIdx == 0) { // Load
- N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
- N->getOperand(2));
- } else { // Store
- N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
- ImmOperand, N->getOperand(3));
- }
-
- return true;
-}
-
// Try to remove sext.w if the input is a W instruction or can be made into
// a W instruction cheaply.
bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index b50927cfcca5..ef46204c00ac 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -47,7 +47,6 @@ public:
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectBaseAddr(SDValue Addr, SDValue &Base);
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
@@ -61,6 +60,17 @@ public:
bool selectSExti32(SDValue N, SDValue &Val);
bool selectZExti32(SDValue N, SDValue &Val);
+ bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val);
+ bool selectSH1ADDOp(SDValue N, SDValue &Val) {
+ return selectSHXADDOp(N, 1, Val);
+ }
+ bool selectSH2ADDOp(SDValue N, SDValue &Val) {
+ return selectSHXADDOp(N, 2, Val);
+ }
+ bool selectSH3ADDOp(SDValue N, SDValue &Val) {
+ return selectSHXADDOp(N, 3, Val);
+ }
+
bool hasAllNBitUsers(SDNode *Node, unsigned Bits) const;
bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); }
bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); }
@@ -118,7 +128,6 @@ public:
#include "RISCVGenDAGISel.inc"
private:
- bool doPeepholeLoadStoreADDI(SDNode *Node);
bool doPeepholeSExtW(SDNode *Node);
bool doPeepholeMaskedRVV(SDNode *Node);
};
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ff645dea4e7a..658865703079 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -526,6 +526,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_TRUNCATE, ISD::VP_SETCC}, VT,
Custom);
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
+
+ setOperationPromotedToType(
+ ISD::VECTOR_SPLICE, VT,
+ MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
}
for (MVT VT : IntVecVTs) {
@@ -1157,6 +1161,37 @@ bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
return C && C->getAPIntValue().ule(10);
}
+bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getIntegerBitWidth();
+ if (BitSize > Subtarget.getXLen())
+ return false;
+
+ // Fast path, assume 32-bit immediates are cheap.
+ int64_t Val = Imm.getSExtValue();
+ if (isInt<32>(Val))
+ return true;
+
+ // A constant pool entry may be more aligned thant he load we're trying to
+ // replace. If we don't support unaligned scalar mem, prefer the constant
+ // pool.
+ // TODO: Can the caller pass down the alignment?
+ if (!Subtarget.enableUnalignedScalarMem())
+ return true;
+
+ // Prefer to keep the load if it would require many instructions.
+ // This uses the same threshold we use for constant pools but doesn't
+ // check useConstantPoolForLargeInts.
+ // TODO: Should we keep the load only when we're definitely going to emit a
+ // constant pool?
+
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
+ return Seq.size() <= Subtarget.getMaxBuildIntsCost();
+}
+
bool RISCVTargetLowering::
shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
@@ -1659,7 +1694,7 @@ static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
/// Return the type of the mask type suitable for masking the provided
/// vector type. This is simply an i1 element type vector of the same
/// (possibly scalable) length.
-static MVT getMaskTypeFor(EVT VecVT) {
+static MVT getMaskTypeFor(MVT VecVT) {
assert(VecVT.isVector());
ElementCount EC = VecVT.getVectorElementCount();
return MVT::getVectorVT(MVT::i1, EC);
@@ -5748,8 +5783,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT), V1,
DownOffset, TrueMask, UpOffset);
return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, SlideDown, V2, UpOffset,
- TrueMask,
- DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT));
+ TrueMask, DAG.getRegister(RISCV::X0, XLenVT));
}
SDValue
@@ -8530,12 +8564,6 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
return Opcode;
}
-// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
-// FIXME: Should this be a generic combine? There's a similar combine on X86.
-//
-// Also try these folds where an add or sub is in the middle.
-// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
-// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
@@ -8543,12 +8571,40 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
return SDValue();
- auto *ShAmtC = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!ShAmtC || ShAmtC->getZExtValue() > 32)
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return SDValue();
+ uint64_t ShAmt = N->getConstantOperandVal(1);
+ if (ShAmt > 32)
return SDValue();
SDValue N0 = N->getOperand(0);
+ // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
+ // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
+ // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
+ if (ShAmt < 32 &&
+ N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
+ cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
+ N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+ uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
+ if (LShAmt < 32) {
+ SDLoc ShlDL(N0.getOperand(0));
+ SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
+ N0.getOperand(0).getOperand(0),
+ DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
+ DAG.getConstant(ShAmt + 32, DL, MVT::i64));
+ }
+ }
+
+ // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
+ // FIXME: Should this be a generic combine? There's a similar combine on X86.
+ //
+ // Also try these folds where an add or sub is in the middle.
+ // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
+ // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
SDValue Shl;
ConstantSDNode *AddC = nullptr;
@@ -8594,12 +8650,12 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
DAG.getValueType(MVT::i32));
- if (ShAmtC->getZExtValue() == 32)
+ if (ShAmt == 32)
return SExt;
return DAG.getNode(
ISD::SHL, DL, MVT::i64, SExt,
- DAG.getConstant(32 - ShAmtC->getZExtValue(), DL, MVT::i64));
+ DAG.getConstant(32 - ShAmt, DL, MVT::i64));
}
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
@@ -9152,10 +9208,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// FIXME: Support FP.
if (Val.getOpcode() == RISCVISD::VMV_X_S) {
SDValue Src = Val.getOperand(0);
- EVT VecVT = Src.getValueType();
+ MVT VecVT = Src.getSimpleValueType();
EVT MemVT = Store->getMemoryVT();
// The memory VT and the element type must match.
- if (VecVT.getVectorElementType() == MemVT) {
+ if (MemVT == VecVT.getVectorElementType()) {
SDLoc DL(N);
MVT MaskVT = getMaskTypeFor(VecVT);
return DAG.getStoreVP(
@@ -9864,7 +9920,7 @@ EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
Register FLHS = First.getOperand(1).getReg();
Register FRHS = First.getOperand(2).getReg();
// Insert appropriate branch.
- BuildMI(ThisMBB, DL, TII.getBrCond(FirstCC))
+ BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
.addReg(FLHS)
.addReg(FRHS)
.addMBB(SinkMBB);
@@ -9876,7 +9932,7 @@ EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
// Insert appropriate branch.
- BuildMI(FirstMBB, DL, TII.getBrCond(SecondCC))
+ BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
.addReg(SLHS)
.addReg(SRHS)
.addMBB(SinkMBB);
@@ -9884,9 +9940,9 @@ EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
Register DestReg = Second.getOperand(0).getReg();
Register Op2Reg4 = Second.getOperand(4).getReg();
BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
- .addReg(Op1Reg4)
- .addMBB(ThisMBB)
.addReg(Op2Reg4)
+ .addMBB(ThisMBB)
+ .addReg(Op1Reg4)
.addMBB(FirstMBB)
.addReg(Op1Reg5)
.addMBB(SecondMBB);
@@ -12096,6 +12152,17 @@ const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
}
+bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
+ // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
+ // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
+ // a power of two as well.
+ // FIXME: This doesn't work for zve32, but that's already broken
+ // elsewhere for the same reason.
+ assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
+ assert(RISCV::RVVBitsPerBlock == 64 && "RVVBitsPerBlock changed, audit needed");
+ return true;
+}
+
bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
VT = VT.getScalarType();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index eb013d4b6682..5e15176de59c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -520,9 +520,7 @@ public:
SmallVectorImpl<SDValue> &InVals) const override;
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
- Type *Ty) const override {
- return true;
- }
+ Type *Ty) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
bool shouldConsiderGEPOffsetSplit() const override { return true; }
@@ -599,6 +597,8 @@ public:
unsigned uid,
MCContext &Ctx) const override;
+ bool isVScaleKnownToBeAPowerOfTwo() const override;
+
private:
/// RISCVCCAssignFn - This target-specific function extends the default
/// CCValAssign with additional information used to lower RISC-V calling
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index ee4c026af8f4..06a90438838e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -384,7 +384,6 @@ def uimm6gt32 : ImmLeaf<XLenVT, [{
// Necessary because a frameindex can't be matched directly in a pattern.
def FrameAddrRegImm : ComplexPattern<iPTR, 2, "SelectFrameAddrRegImm",
[frameindex, or, add]>;
-def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">;
def AddrRegImm : ComplexPattern<iPTR, 2, "SelectAddrRegImm">;
// Return the negation of an immediate value.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index f8bc241039f8..1ad634344c09 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -115,6 +115,35 @@ class VSXSched<int n, string o> :
class VLFSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDFF" # n),
ReadVLDX, ReadVMask]>;
+// Unit-Stride Segment Loads and Stores
+class VLSEGSched<int nf, int eew> : Sched<[
+ !cast<SchedReadWrite>("WriteVLSEG" #nf #"e" #eew), ReadVLDX, ReadVMask]>;
+class VSSEGSched<int nf, int eew> : Sched<[
+ !cast<SchedReadWrite>("WriteVSSEG" #nf #"e" #eew),
+ !cast<SchedReadWrite>("ReadVSTE" #eew #"V"), ReadVSTX, ReadVMask]>;
+class VLSEGFFSched<int nf, int eew> : Sched<[
+ !cast<SchedReadWrite>("WriteVLSEGFF" #nf #"e" #eew), ReadVLDX, ReadVMask]>;
+// Strided Segment Loads and Stores
+class VLSSEGSched<int nf, int eew> : Sched<[
+ !cast<SchedReadWrite>("WriteVLSSEG" #nf #"e" #eew), ReadVLDX, ReadVLDSX,
+ ReadVMask]>;
+class VSSSEGSched<int nf, int eew> : Sched<[
+ !cast<SchedReadWrite>("WriteVSSSEG" #nf #"e" #eew),
+ !cast<SchedReadWrite>("ReadVSTS" #eew #"V"), ReadVSTX, ReadVSTSX, ReadVMask]>;
+// Indexed Segment Loads and Stores
+class VLUXSEGSched<int nf, int eew> : Sched<[
+ !cast<SchedReadWrite>("WriteVLUXSEG" #nf #"e" #eew), ReadVLDX, ReadVLDUXV,
+ ReadVMask]>;
+class VLOXSEGSched<int nf, int eew> : Sched<[
+ !cast<SchedReadWrite>("WriteVLOXSEG" #nf #"e" #eew), ReadVLDX, ReadVLDOXV,
+ ReadVMask]>;
+class VSUXSEGSched<int nf, int eew> : Sched<[
+ !cast<SchedReadWrite>("WriteVSUXSEG" #nf #"e" #eew),
+ !cast<SchedReadWrite>("ReadVSTUX" #eew), ReadVSTX, ReadVSTUXV, ReadVMask]>;
+class VSOXSEGSched<int nf, int eew> : Sched<[
+ !cast<SchedReadWrite>("WriteVSOXSEG" #nf #"e" #eew),
+ !cast<SchedReadWrite>("ReadVSTOX" #eew), ReadVSTX, ReadVSTOXV, ReadVMask]>;
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -1476,14 +1505,9 @@ defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
RVVConstraint = NoConstraint in {
-def VMV1R_V : RVInstV<0b100111, 0, OPIVI, (outs VR:$vd), (ins VR:$vs2),
- "vmv1r.v", "$vd, $vs2">, VMVRSched<1> {
- let Uses = [];
- let vm = 1;
-}
// A future extension may relax the vector register alignment restrictions.
-foreach n = [2, 4, 8] in {
- defvar vrc = !cast<VReg>("VRM"#n);
+foreach n = [1, 2, 4, 8] in {
+ defvar vrc = !cast<VReg>(!if(!eq(n, 1), "VR", "VRM"#n));
def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs vrc:$vd),
(ins vrc:$vs2), "vmv" # n # "r.v", "$vd, $vs2">,
VMVRSched<n> {
@@ -1500,31 +1524,35 @@ let Predicates = [HasVInstructions] in {
defvar w = !cast<RISCVWidth>("LSWidth"#eew);
def VLSEG#nf#E#eew#_V :
- VUnitStrideSegmentLoad<!add(nf, -1), w, "vlseg"#nf#"e"#eew#".v">;
+ VUnitStrideSegmentLoad<!add(nf, -1), w, "vlseg"#nf#"e"#eew#".v">,
+ VLSEGSched<nf, eew>;
def VLSEG#nf#E#eew#FF_V :
- VUnitStrideSegmentLoadFF<!add(nf, -1), w, "vlseg"#nf#"e"#eew#"ff.v">;
+ VUnitStrideSegmentLoadFF<!add(nf, -1), w, "vlseg"#nf#"e"#eew#"ff.v">,
+ VLSEGFFSched<nf, eew>;
def VSSEG#nf#E#eew#_V :
- VUnitStrideSegmentStore<!add(nf, -1), w, "vsseg"#nf#"e"#eew#".v">;
-
+ VUnitStrideSegmentStore<!add(nf, -1), w, "vsseg"#nf#"e"#eew#".v">,
+ VSSEGSched<nf, eew>;
// Vector Strided Instructions
def VLSSEG#nf#E#eew#_V :
- VStridedSegmentLoad<!add(nf, -1), w, "vlsseg"#nf#"e"#eew#".v">;
+ VStridedSegmentLoad<!add(nf, -1), w, "vlsseg"#nf#"e"#eew#".v">,
+ VLSSEGSched<nf, eew>;
def VSSSEG#nf#E#eew#_V :
- VStridedSegmentStore<!add(nf, -1), w, "vssseg"#nf#"e"#eew#".v">;
+ VStridedSegmentStore<!add(nf, -1), w, "vssseg"#nf#"e"#eew#".v">,
+ VSSSEGSched<nf, eew>;
// Vector Indexed Instructions
def VLUXSEG#nf#EI#eew#_V :
VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, w,
- "vluxseg"#nf#"ei"#eew#".v">;
+ "vluxseg"#nf#"ei"#eew#".v">, VLUXSEGSched<nf, eew>;
def VLOXSEG#nf#EI#eew#_V :
VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, w,
- "vloxseg"#nf#"ei"#eew#".v">;
+ "vloxseg"#nf#"ei"#eew#".v">, VLOXSEGSched<nf, eew>;
def VSUXSEG#nf#EI#eew#_V :
VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, w,
- "vsuxseg"#nf#"ei"#eew#".v">;
+ "vsuxseg"#nf#"ei"#eew#".v">, VSUXSEGSched<nf, eew>;
def VSOXSEG#nf#EI#eew#_V :
VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, w,
- "vsoxseg"#nf#"ei"#eew#".v">;
+ "vsoxseg"#nf#"ei"#eew#".v">, VSOXSEGSched<nf, eew>;
}
}
} // Predicates = [HasVInstructions]
@@ -1533,17 +1561,22 @@ let Predicates = [HasVInstructionsI64] in {
foreach nf=2-8 in {
// Vector Unit-strided Segment Instructions
def VLSEG#nf#E64_V :
- VUnitStrideSegmentLoad<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64.v">;
+ VUnitStrideSegmentLoad<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64.v">,
+ VLSEGSched<nf, 64>;
def VLSEG#nf#E64FF_V :
- VUnitStrideSegmentLoadFF<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64ff.v">;
+ VUnitStrideSegmentLoadFF<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64ff.v">,
+ VLSEGFFSched<nf, 64>;
def VSSEG#nf#E64_V :
- VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">;
+ VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">,
+ VSSEGSched<nf, 64>;
// Vector Strided Segment Instructions
def VLSSEG#nf#E64_V :
- VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">;
+ VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">,
+ VLSSEGSched<nf, 64>;
def VSSSEG#nf#E64_V :
- VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">;
+ VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">,
+ VSSSEGSched<nf, 64>;
}
} // Predicates = [HasVInstructionsI64]
let Predicates = [HasVInstructionsI64, IsRV64] in {
@@ -1551,16 +1584,16 @@ let Predicates = [HasVInstructionsI64, IsRV64] in {
// Vector Indexed Segment Instructions
def VLUXSEG#nf#EI64_V :
VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, LSWidth64,
- "vluxseg"#nf#"ei64.v">;
+ "vluxseg"#nf#"ei64.v">, VLUXSEGSched<nf, 64>;
def VLOXSEG#nf#EI64_V :
VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, LSWidth64,
- "vloxseg"#nf#"ei64.v">;
+ "vloxseg"#nf#"ei64.v">, VLOXSEGSched<nf, 64>;
def VSUXSEG#nf#EI64_V :
VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, LSWidth64,
- "vsuxseg"#nf#"ei64.v">;
+ "vsuxseg"#nf#"ei64.v">, VSUXSEGSched<nf, 64>;
def VSOXSEG#nf#EI64_V :
VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, LSWidth64,
- "vsoxseg"#nf#"ei64.v">;
+ "vsoxseg"#nf#"ei64.v">, VSOXSEGSched<nf, 64>;
}
} // Predicates = [HasVInstructionsI64, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 06d4c4d0a9e6..b7b25643e397 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -34,11 +34,11 @@ multiclass VPatUSLoadStoreSDNode<ValueType type,
defvar load_instr = !cast<Instruction>("PseudoVLE"#sew#"_V_"#vlmul.MX);
defvar store_instr = !cast<Instruction>("PseudoVSE"#sew#"_V_"#vlmul.MX);
// Load
- def : Pat<(type (load BaseAddr:$rs1)),
- (load_instr BaseAddr:$rs1, avl, log2sew)>;
+ def : Pat<(type (load GPR:$rs1)),
+ (load_instr GPR:$rs1, avl, log2sew)>;
// Store
- def : Pat<(store type:$rs2, BaseAddr:$rs1),
- (store_instr reg_class:$rs2, BaseAddr:$rs1, avl, log2sew)>;
+ def : Pat<(store type:$rs2, GPR:$rs1),
+ (store_instr reg_class:$rs2, GPR:$rs1, avl, log2sew)>;
}
multiclass VPatUSLoadStoreWholeVRSDNode<ValueType type,
@@ -53,11 +53,11 @@ multiclass VPatUSLoadStoreWholeVRSDNode<ValueType type,
!cast<Instruction>("VS"#!substr(vlmul.MX, 1)#"R_V");
// Load
- def : Pat<(type (load BaseAddr:$rs1)),
- (load_instr BaseAddr:$rs1)>;
+ def : Pat<(type (load GPR:$rs1)),
+ (load_instr GPR:$rs1)>;
// Store
- def : Pat<(store type:$rs2, BaseAddr:$rs1),
- (store_instr reg_class:$rs2, BaseAddr:$rs1)>;
+ def : Pat<(store type:$rs2, GPR:$rs1),
+ (store_instr reg_class:$rs2, GPR:$rs1)>;
}
multiclass VPatUSLoadStoreMaskSDNode<MTypeInfo m>
@@ -65,11 +65,11 @@ multiclass VPatUSLoadStoreMaskSDNode<MTypeInfo m>
defvar load_instr = !cast<Instruction>("PseudoVLM_V_"#m.BX);
defvar store_instr = !cast<Instruction>("PseudoVSM_V_"#m.BX);
// Load
- def : Pat<(m.Mask (load BaseAddr:$rs1)),
- (load_instr BaseAddr:$rs1, m.AVL, m.Log2SEW)>;
+ def : Pat<(m.Mask (load GPR:$rs1)),
+ (load_instr GPR:$rs1, m.AVL, m.Log2SEW)>;
// Store
- def : Pat<(store m.Mask:$rs2, BaseAddr:$rs1),
- (store_instr VR:$rs2, BaseAddr:$rs1, m.AVL, m.Log2SEW)>;
+ def : Pat<(store m.Mask:$rs2, GPR:$rs1),
+ (store_instr VR:$rs2, GPR:$rs1, m.AVL, m.Log2SEW)>;
}
class VPatBinarySDNode_VV<SDNode vop,
@@ -1038,10 +1038,14 @@ let Predicates = [HasVInstructionsAnyF] in
foreach vti = AllFloatVectors in {
// Fold store of vmv.f.s to a vse with VL=1.
defvar store_instr = !cast<Instruction>("PseudoVSE"#vti.SEW#"_V_"#vti.LMul.MX);
- def : Pat<(store (vti.Scalar (int_riscv_vfmv_f_s (vti.Vector vti.RegClass:$rs2))), BaseAddr:$rs1),
- (store_instr vti.RegClass:$rs2, BaseAddr:$rs1, 1, vti.Log2SEW)>;
- def : Pat<(store (extractelt (vti.Vector vti.RegClass:$rs2), 0), BaseAddr:$rs1),
- (store_instr vti.RegClass:$rs2, BaseAddr:$rs1, 1, vti.Log2SEW)>;
+
+ let AddedComplexity = 2 in {
+ // Add complexity to increase the priority of this pattern being matched.
+ def : Pat<(store (vti.Scalar (int_riscv_vfmv_f_s (vti.Vector vti.RegClass:$rs2))), GPR:$rs1),
+ (store_instr vti.RegClass:$rs2, GPR:$rs1, 1, vti.Log2SEW)>;
+ def : Pat<(store (extractelt (vti.Vector vti.RegClass:$rs2), 0), GPR:$rs1),
+ (store_instr vti.RegClass:$rs2, GPR:$rs1, 1, vti.Log2SEW)>;
+ }
defvar vmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_",
vti.ScalarSuffix,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 081f61617d59..49306bb0f4e2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -76,13 +76,13 @@ def riscv_urem_vl : SDNode<"RISCVISD::UREM_VL", SDT_RISCVIntBinOp_VL>;
def riscv_shl_vl : SDNode<"RISCVISD::SHL_VL", SDT_RISCVIntBinOp_VL>;
def riscv_sra_vl : SDNode<"RISCVISD::SRA_VL", SDT_RISCVIntBinOp_VL>;
def riscv_srl_vl : SDNode<"RISCVISD::SRL_VL", SDT_RISCVIntBinOp_VL>;
-def riscv_smin_vl : SDNode<"RISCVISD::SMIN_VL", SDT_RISCVIntBinOp_VL>;
-def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL>;
-def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL>;
-def riscv_umax_vl : SDNode<"RISCVISD::UMAX_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_smin_vl : SDNode<"RISCVISD::SMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_umax_vl : SDNode<"RISCVISD::UMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
-def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL>;
-def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
def riscv_usubsat_vl : SDNode<"RISCVISD::USUBSAT_VL", SDT_RISCVIntBinOp_VL>;
@@ -94,8 +94,8 @@ def riscv_fneg_vl : SDNode<"RISCVISD::FNEG_VL", SDT_RISCVFPUnOp_VL>;
def riscv_fabs_vl : SDNode<"RISCVISD::FABS_VL", SDT_RISCVFPUnOp_VL>;
def riscv_fsqrt_vl : SDNode<"RISCVISD::FSQRT_VL", SDT_RISCVFPUnOp_VL>;
def riscv_fcopysign_vl : SDNode<"RISCVISD::FCOPYSIGN_VL", SDT_RISCVFPBinOp_VL>;
-def riscv_fminnum_vl : SDNode<"RISCVISD::FMINNUM_VL", SDT_RISCVFPBinOp_VL>;
-def riscv_fmaxnum_vl : SDNode<"RISCVISD::FMAXNUM_VL", SDT_RISCVFPBinOp_VL>;
+def riscv_fminnum_vl : SDNode<"RISCVISD::FMINNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
+def riscv_fmaxnum_vl : SDNode<"RISCVISD::FMAXNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 9532d1dd3dd2..02ae4f88d56a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -83,13 +83,13 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
def BCLRXForm : SDNodeXForm<imm, [{
// Find the lowest 0.
- return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(),
+ return CurDAG->getTargetConstant(countTrailingOnes(N->getZExtValue()),
SDLoc(N), N->getValueType(0));
}]>;
def BSETINVXForm : SDNodeXForm<imm, [{
// Find the lowest 1.
- return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(),
+ return CurDAG->getTargetConstant(countTrailingZeros(N->getZExtValue()),
SDLoc(N), N->getValueType(0));
}]>;
@@ -239,6 +239,10 @@ def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{
return !C || !isInt<12>(C->getSExtValue());
}]>;
+def sh1add_op : ComplexPattern<XLenVT, 1, "selectSH1ADDOp", [], [], 6>;
+def sh2add_op : ComplexPattern<XLenVT, 1, "selectSH2ADDOp", [], [], 6>;
+def sh3add_op : ComplexPattern<XLenVT, 1, "selectSH3ADDOp", [], [], 6>;
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -1095,6 +1099,14 @@ def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), non_imm12:$rs2),
def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), non_imm12:$rs2),
(SH3ADD GPR:$rs1, GPR:$rs2)>;
+// More complex cases use a ComplexPattern.
+def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2),
+ (SH1ADD sh1add_op:$rs1, GPR:$rs2)>;
+def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2),
+ (SH2ADD sh2add_op:$rs1, GPR:$rs2)>;
+def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
+ (SH3ADD sh3add_op:$rs1, GPR:$rs2)>;
+
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
(SH1ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2),
@@ -1190,18 +1202,6 @@ def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)),
(SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>;
def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)),
(SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>;
-
-// Use SRLIW to shift out the LSBs and zero the upper 32-bits. Use SHXADD to
-// shift zeros into the LSBs the addition shl amount.
-def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFE), (i64 1)),
- non_imm12:$rs2)),
- (SH2ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
-def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFE), (i64 2)),
- non_imm12:$rs2)),
- (SH3ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
-def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFC), (i64 1)),
- non_imm12:$rs2)),
- (SH3ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>;
} // Predicates = [HasStdExtZba, IsRV64]
let Predicates = [HasStdExtZbcOrZbkc] in {
diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index 1fc424411c12..dad0aa476471 100644
--- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -293,8 +293,16 @@ static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) &&
"Unsupported instruction for this optimization.");
+ int SkipN = 0;
+
+ // Skip the first (value) operand to a store instruction (except if the store
+ // offset is zero) in order to avoid an incorrect transformation.
+ // e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2)
+ if (isCompressibleStore(MI) && OldRegImm.Imm != 0)
+ SkipN = 1;
+
// Update registers
- for (MachineOperand &MO : MI.operands())
+ for (MachineOperand &MO : drop_begin(MI.operands(), SkipN))
if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
// Do not update operands that define the old register.
//
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 43af1802d706..bafcf47b82e4 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -53,6 +53,20 @@ def WriteVLDFF8 : SchedWrite;
def WriteVLDFF16 : SchedWrite;
def WriteVLDFF32 : SchedWrite;
def WriteVLDFF64 : SchedWrite;
+// 7.8. Vector Segment Instructions
+foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ def WriteVLSEG # nf # e # eew : SchedWrite;
+ def WriteVSSEG # nf # e # eew : SchedWrite;
+ def WriteVLSEGFF # nf # e # eew : SchedWrite;
+ def WriteVLSSEG # nf # e # eew : SchedWrite;
+ def WriteVSSSEG # nf # e # eew : SchedWrite;
+ def WriteVLUXSEG # nf # e # eew : SchedWrite;
+ def WriteVLOXSEG # nf # e # eew : SchedWrite;
+ def WriteVSUXSEG # nf # e # eew : SchedWrite;
+ def WriteVSOXSEG # nf # e # eew : SchedWrite;
+ }
+}
// 7.9. Vector Whole Register Instructions
def WriteVLD1R8 : SchedWrite;
def WriteVLD1R16 : SchedWrite;
@@ -538,6 +552,20 @@ def : WriteRes<WriteVST1R, []>;
def : WriteRes<WriteVST2R, []>;
def : WriteRes<WriteVST4R, []>;
def : WriteRes<WriteVST8R, []>;
+// Vector Segment Loads and Stores
+foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ def : WriteRes <!cast<SchedWrite>("WriteVLSEG" # nf # "e" # eew), []>;
+ def : WriteRes <!cast<SchedWrite>("WriteVLSEGFF" # nf # "e" # eew), []>;
+ def : WriteRes <!cast<SchedWrite>("WriteVSSEG" # nf # "e" # eew), []>;
+ def : WriteRes <!cast<SchedWrite>("WriteVLSSEG" # nf # "e" # eew), []>;
+ def : WriteRes <!cast<SchedWrite>("WriteVSSSEG" # nf # "e" # eew), []>;
+ def : WriteRes <!cast<SchedWrite>("WriteVLUXSEG" # nf # "e" # eew), []>;
+ def : WriteRes <!cast<SchedWrite>("WriteVLOXSEG" # nf # "e" # eew), []>;
+ def : WriteRes <!cast<SchedWrite>("WriteVSUXSEG" # nf # "e" # eew), []>;
+ def : WriteRes <!cast<SchedWrite>("WriteVSOXSEG" # nf # "e" # eew), []>;
+ }
+}
// 12. Vector Integer Arithmetic Instructions
def : WriteRes<WriteVIALUV, []>;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 7caf0fedb2ca..96c46fb7554f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -57,6 +57,10 @@ public:
bool shouldExpandReduction(const IntrinsicInst *II) const;
bool supportsScalableVectors() const { return ST->hasVInstructions(); }
+ PredicationStyle emitGetActiveLaneMask() const {
+ return ST->hasVInstructions() ? PredicationStyle::Data
+ : PredicationStyle::None;
+ }
Optional<unsigned> getMaxVScale() const;
Optional<unsigned> getVScaleForTuning() const;
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp
index d953bc590473..f726f42c9bcb 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp
@@ -46,12 +46,6 @@ public:
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
-
-private:
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
@@ -110,9 +104,6 @@ static void emitUntypedInstrOperands(const MCInst &MI, EndianWriter &OSE) {
void SPIRVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- auto Features = computeAvailableFeatures(STI.getFeatureBits());
- verifyInstructionPredicates(MI, Features);
-
EndianWriter OSE(OS, support::little);
// Encode the first 32 SPIR-V bytes with the number of args and the opcode.
@@ -128,5 +119,4 @@ void SPIRVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
emitUntypedInstrOperands(MI, OSE);
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SPIRVGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
index 6b8b4a73af92..62ce15550ae7 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SPIRVGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h
index 4009fa96aa68..abc8df34be0a 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h
@@ -44,6 +44,7 @@ std::unique_ptr<MCObjectTargetWriter> createSPIRVObjectTargetWriter();
// Defines symbolic names for the SPIR-V instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "SPIRVGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
index 0de232651377..605bf949187f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
@@ -215,6 +215,9 @@ void SPIRVAsmPrinter::outputInstruction(const MachineInstr *MI) {
}
void SPIRVAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ SPIRV_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
if (!MAI->getSkipEmission(MI))
outputInstruction(MI);
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index df07a126eeea..5b6b82aebf30 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -68,6 +68,7 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
ArrayRef<ArrayRef<Register>> VRegs,
FunctionLoweringInfo &FLI) const {
assert(GR && "Must initialize the SPIRV type registry before lowering args.");
+ GR->setCurrentFunc(MIRBuilder.getMF());
// Assign types and names to all args, and store their types for later.
SmallVector<Register, 4> ArgTypeVRegs;
@@ -114,6 +115,8 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
auto MRI = MIRBuilder.getMRI();
Register FuncVReg = MRI->createGenericVirtualRegister(LLT::scalar(32));
MRI->setRegClass(FuncVReg, &SPIRV::IDRegClass);
+ if (F.isDeclaration())
+ GR->add(&F, &MIRBuilder.getMF(), FuncVReg);
auto *FTy = F.getFunctionType();
auto FuncTy = GR->assignTypeToVReg(FTy, FuncVReg, MIRBuilder);
@@ -136,6 +139,8 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
MIRBuilder.buildInstr(SPIRV::OpFunctionParameter)
.addDef(VRegs[i][0])
.addUse(ArgTypeVRegs[i]);
+ if (F.isDeclaration())
+ GR->add(F.getArg(i), &MIRBuilder.getMF(), VRegs[i][0]);
}
// Name the function.
if (F.hasName())
@@ -165,6 +170,7 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (Info.OrigRet.Regs.size() > 1)
return false;
+ GR->setCurrentFunc(MIRBuilder.getMF());
Register ResVReg =
Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0];
// Emit a regular OpFunctionCall. If it's an externally declared function,
diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
new file mode 100644
index 000000000000..57cd4bafd351
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
@@ -0,0 +1,95 @@
+//===-- SPIRVDuplicatesTracker.cpp - SPIR-V Duplicates Tracker --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// General infrastructure for keeping track of the values that according to
+// the SPIR-V binary layout should be global to the whole module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVDuplicatesTracker.h"
+
+using namespace llvm;
+
+template <typename T>
+void SPIRVGeneralDuplicatesTracker::prebuildReg2Entry(
+ SPIRVDuplicatesTracker<T> &DT, SPIRVReg2EntryTy &Reg2Entry) {
+ for (auto &TPair : DT.getAllUses()) {
+ for (auto &RegPair : TPair.second) {
+ const MachineFunction *MF = RegPair.first;
+ Register R = RegPair.second;
+ MachineInstr *MI = MF->getRegInfo().getUniqueVRegDef(R);
+ if (!MI)
+ continue;
+ Reg2Entry[&MI->getOperand(0)] = &TPair.second;
+ }
+ }
+}
+
+void SPIRVGeneralDuplicatesTracker::buildDepsGraph(
+ std::vector<SPIRV::DTSortableEntry *> &Graph,
+ MachineModuleInfo *MMI = nullptr) {
+ SPIRVReg2EntryTy Reg2Entry;
+ prebuildReg2Entry(TT, Reg2Entry);
+ prebuildReg2Entry(CT, Reg2Entry);
+ prebuildReg2Entry(GT, Reg2Entry);
+ prebuildReg2Entry(FT, Reg2Entry);
+ prebuildReg2Entry(AT, Reg2Entry);
+
+ for (auto &Op2E : Reg2Entry) {
+ SPIRV::DTSortableEntry *E = Op2E.second;
+ Graph.push_back(E);
+ for (auto &U : *E) {
+ const MachineRegisterInfo &MRI = U.first->getRegInfo();
+ MachineInstr *MI = MRI.getUniqueVRegDef(U.second);
+ if (!MI)
+ continue;
+ assert(MI && MI->getParent() && "No MachineInstr created yet");
+ for (auto i = MI->getNumDefs(); i < MI->getNumOperands(); i++) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg())
+ continue;
+ MachineOperand *RegOp = &MRI.getVRegDef(Op.getReg())->getOperand(0);
+ assert((MI->getOpcode() == SPIRV::OpVariable && i == 3) ||
+ Reg2Entry.count(RegOp));
+ if (Reg2Entry.count(RegOp))
+ E->addDep(Reg2Entry[RegOp]);
+ }
+
+ if (E->getIsFunc()) {
+ MachineInstr *Next = MI->getNextNode();
+ if (Next && (Next->getOpcode() == SPIRV::OpFunction ||
+ Next->getOpcode() == SPIRV::OpFunctionParameter)) {
+ E->addDep(Reg2Entry[&Next->getOperand(0)]);
+ }
+ }
+ }
+ }
+
+ if (MMI) {
+ const Module *M = MMI->getModule();
+ for (auto F = M->begin(), E = M->end(); F != E; ++F) {
+ const MachineFunction *MF = MMI->getMachineFunction(*F);
+ if (!MF)
+ continue;
+ for (const MachineBasicBlock &MBB : *MF) {
+ for (const MachineInstr &CMI : MBB) {
+ MachineInstr &MI = const_cast<MachineInstr &>(CMI);
+ MI.dump();
+ if (MI.getNumExplicitDefs() > 0 &&
+ Reg2Entry.count(&MI.getOperand(0))) {
+ dbgs() << "\t[";
+ for (SPIRV::DTSortableEntry *D :
+ Reg2Entry.lookup(&MI.getOperand(0))->getDeps())
+ dbgs() << Register::virtReg2Index(D->lookup(MF)) << ", ";
+ dbgs() << "]\n";
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
new file mode 100644
index 000000000000..58ae1f86ce42
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
@@ -0,0 +1,174 @@
+//===-- SPIRVDuplicatesTracker.h - SPIR-V Duplicates Tracker ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// General infrastructure for keeping track of the values that according to
+// the SPIR-V binary layout should be global to the whole module.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVDUPLICATESTRACKER_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVDUPLICATESTRACKER_H
+
+#include "MCTargetDesc/SPIRVBaseInfo.h"
+#include "MCTargetDesc/SPIRVMCTargetDesc.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+#include <type_traits>
+
+namespace llvm {
+namespace SPIRV {
+// NOTE: using MapVector instead of DenseMap because it helps getting
+// everything ordered in a stable manner for a price of extra (NumKeys)*PtrSize
+// memory and expensive removals which do not happen anyway.
+class DTSortableEntry : public MapVector<const MachineFunction *, Register> {
+ SmallVector<DTSortableEntry *, 2> Deps;
+
+ struct FlagsTy {
+ unsigned IsFunc : 1;
+ unsigned IsGV : 1;
+ // NOTE: bit-field default init is a C++20 feature.
+ FlagsTy() : IsFunc(0), IsGV(0) {}
+ };
+ FlagsTy Flags;
+
+public:
+ // Common hoisting utility doesn't support function, because their hoisting
+ // require hoisting of params as well.
+ bool getIsFunc() const { return Flags.IsFunc; }
+ bool getIsGV() const { return Flags.IsGV; }
+ void setIsFunc(bool V) { Flags.IsFunc = V; }
+ void setIsGV(bool V) { Flags.IsGV = V; }
+
+ const SmallVector<DTSortableEntry *, 2> &getDeps() const { return Deps; }
+ void addDep(DTSortableEntry *E) { Deps.push_back(E); }
+};
+} // namespace SPIRV
+
+template <typename KeyTy> class SPIRVDuplicatesTrackerBase {
+public:
+ // NOTE: using MapVector instead of DenseMap helps getting everything ordered
+ // in a stable manner for a price of extra (NumKeys)*PtrSize memory and
+ // expensive removals which don't happen anyway.
+ using StorageTy = MapVector<KeyTy, SPIRV::DTSortableEntry>;
+
+private:
+ StorageTy Storage;
+
+public:
+ void add(KeyTy V, const MachineFunction *MF, Register R) {
+ if (find(V, MF).isValid())
+ return;
+
+ Storage[V][MF] = R;
+ if (std::is_same<Function,
+ typename std::remove_const<
+ typename std::remove_pointer<KeyTy>::type>::type>() ||
+ std::is_same<Argument,
+ typename std::remove_const<
+ typename std::remove_pointer<KeyTy>::type>::type>())
+ Storage[V].setIsFunc(true);
+ if (std::is_same<GlobalVariable,
+ typename std::remove_const<
+ typename std::remove_pointer<KeyTy>::type>::type>())
+ Storage[V].setIsGV(true);
+ }
+
+ Register find(KeyTy V, const MachineFunction *MF) const {
+ auto iter = Storage.find(V);
+ if (iter != Storage.end()) {
+ auto Map = iter->second;
+ auto iter2 = Map.find(MF);
+ if (iter2 != Map.end())
+ return iter2->second;
+ }
+ return Register();
+ }
+
+ const StorageTy &getAllUses() const { return Storage; }
+
+private:
+ StorageTy &getAllUses() { return Storage; }
+
+ // The friend class needs to have access to the internal storage
+ // to be able to build dependency graph, can't declare only one
+ // function a 'friend' due to the incomplete declaration at this point
+ // and mutual dependency problems.
+ friend class SPIRVGeneralDuplicatesTracker;
+};
+
+template <typename T>
+class SPIRVDuplicatesTracker : public SPIRVDuplicatesTrackerBase<const T *> {};
+
+class SPIRVGeneralDuplicatesTracker {
+ SPIRVDuplicatesTracker<Type> TT;
+ SPIRVDuplicatesTracker<Constant> CT;
+ SPIRVDuplicatesTracker<GlobalVariable> GT;
+ SPIRVDuplicatesTracker<Function> FT;
+ SPIRVDuplicatesTracker<Argument> AT;
+
+ // NOTE: using MOs instead of regs to get rid of MF dependency to be able
+ // to use flat data structure.
+ // NOTE: replacing DenseMap with MapVector doesn't affect overall correctness
+ // but makes LITs more stable, should prefer DenseMap still due to
+ // significant perf difference.
+ using SPIRVReg2EntryTy =
+ MapVector<MachineOperand *, SPIRV::DTSortableEntry *>;
+
+ template <typename T>
+ void prebuildReg2Entry(SPIRVDuplicatesTracker<T> &DT,
+ SPIRVReg2EntryTy &Reg2Entry);
+
+public:
+ void buildDepsGraph(std::vector<SPIRV::DTSortableEntry *> &Graph,
+ MachineModuleInfo *MMI);
+
+ void add(const Type *T, const MachineFunction *MF, Register R) {
+ TT.add(T, MF, R);
+ }
+
+ void add(const Constant *C, const MachineFunction *MF, Register R) {
+ CT.add(C, MF, R);
+ }
+
+ void add(const GlobalVariable *GV, const MachineFunction *MF, Register R) {
+ GT.add(GV, MF, R);
+ }
+
+ void add(const Function *F, const MachineFunction *MF, Register R) {
+ FT.add(F, MF, R);
+ }
+
+ void add(const Argument *Arg, const MachineFunction *MF, Register R) {
+ AT.add(Arg, MF, R);
+ }
+
+ Register find(const Type *T, const MachineFunction *MF) {
+ return TT.find(const_cast<Type *>(T), MF);
+ }
+
+ Register find(const Constant *C, const MachineFunction *MF) {
+ return CT.find(const_cast<Constant *>(C), MF);
+ }
+
+ Register find(const GlobalVariable *GV, const MachineFunction *MF) {
+ return GT.find(const_cast<GlobalVariable *>(GV), MF);
+ }
+
+ Register find(const Function *F, const MachineFunction *MF) {
+ return FT.find(const_cast<Function *>(F), MF);
+ }
+
+ Register find(const Argument *Arg, const MachineFunction *MF) {
+ return AT.find(const_cast<Argument *>(Arg), MF);
+ }
+};
+} // namespace llvm
+#endif \ No newline at end of file
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 02a6905a1abc..5f890c003cbc 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -101,7 +101,6 @@ Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val,
SPIRVType *SpvType,
bool EmitIR) {
auto &MF = MIRBuilder.getMF();
- Register Res;
const IntegerType *LLVMIntTy;
if (SpvType)
LLVMIntTy = cast<IntegerType>(getTypeForSPIRVType(SpvType));
@@ -110,15 +109,18 @@ Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val,
// Find a constant in DT or build a new one.
const auto ConstInt =
ConstantInt::get(const_cast<IntegerType *>(LLVMIntTy), Val);
- unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
- Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth));
- assignTypeToVReg(LLVMIntTy, Res, MIRBuilder);
- if (EmitIR)
- MIRBuilder.buildConstant(Res, *ConstInt);
- else
- MIRBuilder.buildInstr(SPIRV::OpConstantI)
- .addDef(Res)
- .addImm(ConstInt->getSExtValue());
+ Register Res = DT.find(ConstInt, &MF);
+ if (!Res.isValid()) {
+ unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
+ Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth));
+ assignTypeToVReg(LLVMIntTy, Res, MIRBuilder);
+ if (EmitIR)
+ MIRBuilder.buildConstant(Res, *ConstInt);
+ else
+ MIRBuilder.buildInstr(SPIRV::OpConstantI)
+ .addDef(Res)
+ .addImm(ConstInt->getSExtValue());
+ }
return Res;
}
@@ -126,7 +128,6 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val,
MachineIRBuilder &MIRBuilder,
SPIRVType *SpvType) {
auto &MF = MIRBuilder.getMF();
- Register Res;
const Type *LLVMFPTy;
if (SpvType) {
LLVMFPTy = getTypeForSPIRVType(SpvType);
@@ -136,10 +137,13 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val,
}
// Find a constant in DT or build a new one.
const auto ConstFP = ConstantFP::get(LLVMFPTy->getContext(), Val);
- unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
- Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth));
- assignTypeToVReg(LLVMFPTy, Res, MIRBuilder);
- MIRBuilder.buildFConstant(Res, *ConstFP);
+ Register Res = DT.find(ConstFP, &MF);
+ if (!Res.isValid()) {
+ unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
+ Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth));
+ assignTypeToVReg(LLVMFPTy, Res, MIRBuilder);
+ MIRBuilder.buildFConstant(Res, *ConstFP);
+ }
return Res;
}
@@ -184,6 +188,7 @@ Register SPIRVGlobalRegistry::buildGlobalVariable(
*Subtarget.getRegBankInfo());
}
Reg = MIB->getOperand(0).getReg();
+ DT.add(GVar, &MIRBuilder.getMF(), Reg);
// Set to Reg the same type as ResVReg has.
auto MRI = MIRBuilder.getMRI();
@@ -318,10 +323,11 @@ SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const {
SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(
const Type *Type, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier AccessQual, bool EmitIR) {
+ Register Reg = DT.find(Type, &MIRBuilder.getMF());
+ if (Reg.isValid())
+ return getSPIRVTypeForVReg(Reg);
SPIRVType *SpirvType = createSPIRVType(Type, MIRBuilder, AccessQual, EmitIR);
- VRegToTypeMap[&MIRBuilder.getMF()][getSPIRVTypeID(SpirvType)] = SpirvType;
- SPIRVToLLVMType[SpirvType] = Type;
- return SpirvType;
+ return restOfCreateSPIRVType(Type, SpirvType);
}
bool SPIRVGlobalRegistry::isScalarOfType(Register VReg,
@@ -387,17 +393,21 @@ SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(unsigned BitWidth,
MIRBuilder);
}
-SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(Type *LLVMTy,
- MachineInstrBuilder MIB) {
- SPIRVType *SpirvType = MIB;
+SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(const Type *LLVMTy,
+ SPIRVType *SpirvType) {
+ assert(CurMF == SpirvType->getMF());
VRegToTypeMap[CurMF][getSPIRVTypeID(SpirvType)] = SpirvType;
SPIRVToLLVMType[SpirvType] = LLVMTy;
+ DT.add(LLVMTy, CurMF, getSPIRVTypeID(SpirvType));
return SpirvType;
}
SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(
unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII) {
Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), BitWidth);
+ Register Reg = DT.find(LLVMTy, CurMF);
+ if (Reg.isValid())
+ return getSPIRVTypeForVReg(Reg);
MachineBasicBlock &BB = *I.getParent();
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeInt))
.addDef(createTypeVReg(CurMF->getRegInfo()))
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index 952ab4c13e29..13dcc20a3e0a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -17,6 +17,7 @@
#define LLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H
#include "MCTargetDesc/SPIRVBaseInfo.h"
+#include "SPIRVDuplicatesTracker.h"
#include "SPIRVInstrInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -30,7 +31,10 @@ class SPIRVGlobalRegistry {
// where Reg = OpType...
// while VRegToTypeMap tracks SPIR-V type assigned to other regs (i.e. not
// type-declaring ones)
- DenseMap<MachineFunction *, DenseMap<Register, SPIRVType *>> VRegToTypeMap;
+ DenseMap<const MachineFunction *, DenseMap<Register, SPIRVType *>>
+ VRegToTypeMap;
+
+ SPIRVGeneralDuplicatesTracker DT;
DenseMap<SPIRVType *, const Type *> SPIRVToLLVMType;
@@ -48,6 +52,39 @@ public:
MachineFunction *CurMF;
+ void add(const Constant *C, MachineFunction *MF, Register R) {
+ DT.add(C, MF, R);
+ }
+
+ void add(const GlobalVariable *GV, MachineFunction *MF, Register R) {
+ DT.add(GV, MF, R);
+ }
+
+ void add(const Function *F, MachineFunction *MF, Register R) {
+ DT.add(F, MF, R);
+ }
+
+ void add(const Argument *Arg, MachineFunction *MF, Register R) {
+ DT.add(Arg, MF, R);
+ }
+
+ Register find(const Constant *C, MachineFunction *MF) {
+ return DT.find(C, MF);
+ }
+
+ Register find(const GlobalVariable *GV, MachineFunction *MF) {
+ return DT.find(GV, MF);
+ }
+
+ Register find(const Function *F, MachineFunction *MF) {
+ return DT.find(F, MF);
+ }
+
+ void buildDepsGraph(std::vector<SPIRV::DTSortableEntry *> &Graph,
+ MachineModuleInfo *MMI = nullptr) {
+ DT.buildDepsGraph(Graph, MMI);
+ }
+
// Get or create a SPIR-V type corresponding the given LLVM IR type,
// and map it to the given VReg by creating an ASSIGN_TYPE instruction.
SPIRVType *assignTypeToVReg(
@@ -136,7 +173,7 @@ private:
SPIRVType *getOpTypeFunction(SPIRVType *RetType,
const SmallVectorImpl<SPIRVType *> &ArgTypes,
MachineIRBuilder &MIRBuilder);
- SPIRVType *restOfCreateSPIRVType(Type *LLVMTy, MachineInstrBuilder MIB);
+ SPIRVType *restOfCreateSPIRVType(const Type *LLVMTy, SPIRVType *SpirvType);
public:
Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder,
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 9294a60506a8..90b921a06f21 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -807,23 +807,29 @@ void SPIRVInstructionSelector::renderImm32(MachineInstrBuilder &MIB,
Register
SPIRVInstructionSelector::buildI32Constant(uint32_t Val, MachineInstr &I,
const SPIRVType *ResType) const {
+ Type *LLVMTy = IntegerType::get(GR.CurMF->getFunction().getContext(), 32);
const SPIRVType *SpvI32Ty =
ResType ? ResType : GR.getOrCreateSPIRVIntegerType(32, I, TII);
- Register NewReg;
- NewReg = MRI->createGenericVirtualRegister(LLT::scalar(32));
- MachineInstr *MI;
- MachineBasicBlock &BB = *I.getParent();
- if (Val == 0) {
- MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
- .addDef(NewReg)
- .addUse(GR.getSPIRVTypeID(SpvI32Ty));
- } else {
- MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI))
- .addDef(NewReg)
- .addUse(GR.getSPIRVTypeID(SpvI32Ty))
- .addImm(APInt(32, Val).getZExtValue());
+ // Find a constant in DT or build a new one.
+ auto ConstInt = ConstantInt::get(LLVMTy, Val);
+ Register NewReg = GR.find(ConstInt, GR.CurMF);
+ if (!NewReg.isValid()) {
+ NewReg = MRI->createGenericVirtualRegister(LLT::scalar(32));
+ GR.add(ConstInt, GR.CurMF, NewReg);
+ MachineInstr *MI;
+ MachineBasicBlock &BB = *I.getParent();
+ if (Val == 0) {
+ MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
+ .addDef(NewReg)
+ .addUse(GR.getSPIRVTypeID(SpvI32Ty));
+ } else {
+ MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI))
+ .addDef(NewReg)
+ .addUse(GR.getSPIRVTypeID(SpvI32Ty))
+ .addImm(APInt(32, Val).getZExtValue());
+ }
+ constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
}
- constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
return NewReg;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index fa78dd7942c6..a39df5234935 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -28,6 +28,11 @@ using namespace llvm;
#define DEBUG_TYPE "spirv-module-analysis"
+static cl::opt<bool>
+ SPVDumpDeps("spv-dump-deps",
+ cl::desc("Dump MIR with SPIR-V dependencies info"),
+ cl::Optional, cl::init(false));
+
char llvm::SPIRVModuleAnalysis::ID = 0;
namespace llvm {
@@ -113,6 +118,83 @@ static bool findSameInstrInMS(const MachineInstr &A,
return false;
}
+// Collect MI which defines the register in the given machine function.
+static void collectDefInstr(Register Reg, const MachineFunction *MF,
+ SPIRV::ModuleAnalysisInfo *MAI,
+ SPIRV::ModuleSectionType MSType,
+ bool DoInsert = true) {
+ assert(MAI->hasRegisterAlias(MF, Reg) && "Cannot find register alias");
+ MachineInstr *MI = MF->getRegInfo().getUniqueVRegDef(Reg);
+ assert(MI && "There should be an instruction that defines the register");
+ MAI->setSkipEmission(MI);
+ if (DoInsert)
+ MAI->MS[MSType].push_back(MI);
+}
+
+void SPIRVModuleAnalysis::collectGlobalEntities(
+ const std::vector<SPIRV::DTSortableEntry *> &DepsGraph,
+ SPIRV::ModuleSectionType MSType,
+ std::function<bool(const SPIRV::DTSortableEntry *)> Pred,
+ bool UsePreOrder) {
+ DenseSet<const SPIRV::DTSortableEntry *> Visited;
+ for (const auto *E : DepsGraph) {
+ std::function<void(const SPIRV::DTSortableEntry *)> RecHoistUtil;
+ // NOTE: here we prefer recursive approach over iterative because
+ // we don't expect depchains long enough to cause SO.
+ RecHoistUtil = [MSType, UsePreOrder, &Visited, &Pred,
+ &RecHoistUtil](const SPIRV::DTSortableEntry *E) {
+ if (Visited.count(E) || !Pred(E))
+ return;
+ Visited.insert(E);
+
+ // Traversing deps graph in post-order allows us to get rid of
+ // register aliases preprocessing.
+ // But pre-order is required for correct processing of function
+ // declaration and arguments processing.
+ if (!UsePreOrder)
+ for (auto *S : E->getDeps())
+ RecHoistUtil(S);
+
+ Register GlobalReg = Register::index2VirtReg(MAI.getNextID());
+ bool IsFirst = true;
+ for (auto &U : *E) {
+ const MachineFunction *MF = U.first;
+ Register Reg = U.second;
+ MAI.setRegisterAlias(MF, Reg, GlobalReg);
+ if (!MF->getRegInfo().getUniqueVRegDef(Reg))
+ continue;
+ collectDefInstr(Reg, MF, &MAI, MSType, IsFirst);
+ IsFirst = false;
+ if (E->getIsGV())
+ MAI.GlobalVarList.push_back(MF->getRegInfo().getUniqueVRegDef(Reg));
+ }
+
+ if (UsePreOrder)
+ for (auto *S : E->getDeps())
+ RecHoistUtil(S);
+ };
+ RecHoistUtil(E);
+ }
+}
+
+// The function initializes global register alias table for types, consts,
+// global vars and func decls and collects these instruction for output
+// at module level. Also it collects explicit OpExtension/OpCapability
+// instructions.
+void SPIRVModuleAnalysis::processDefInstrs(const Module &M) {
+ std::vector<SPIRV::DTSortableEntry *> DepsGraph;
+
+ GR->buildDepsGraph(DepsGraph, SPVDumpDeps ? MMI : nullptr);
+
+ collectGlobalEntities(
+ DepsGraph, SPIRV::MB_TypeConstVars,
+ [](const SPIRV::DTSortableEntry *E) { return !E->getIsFunc(); }, false);
+
+ collectGlobalEntities(
+ DepsGraph, SPIRV::MB_ExtFuncDecls,
+ [](const SPIRV::DTSortableEntry *E) { return E->getIsFunc(); }, true);
+}
+
// Look for IDs declared with Import linkage, and map the imported name string
// to the register defining that variable (which will usually be the result of
// an OpFunction). This lets us call externally imported functions using
@@ -146,10 +228,9 @@ void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI,
// numbering has already occurred by this point. We can directly compare reg
// arguments when detecting duplicates.
static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI,
- SPIRV::ModuleSectionType MSType,
- bool IsConstOrType = false) {
+ SPIRV::ModuleSectionType MSType) {
MAI.setSkipEmission(&MI);
- if (findSameInstrInMS(MI, MSType, MAI, IsConstOrType, IsConstOrType ? 1 : 0))
+ if (findSameInstrInMS(MI, MSType, MAI, false))
return; // Found a duplicate, so don't add it.
// No duplicates, so add it.
MAI.MS[MSType].push_back(&MI);
@@ -163,18 +244,11 @@ void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) {
continue;
MachineFunction *MF = MMI->getMachineFunction(*F);
assert(MF);
- unsigned FCounter = 0;
for (MachineBasicBlock &MBB : *MF)
for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() == SPIRV::OpFunction)
- FCounter++;
if (MAI.getSkipEmission(&MI))
continue;
const unsigned OpCode = MI.getOpcode();
- const bool IsFuncOrParm =
- OpCode == SPIRV::OpFunction || OpCode == SPIRV::OpFunctionParameter;
- const bool IsConstOrType =
- TII->isConstantInstr(MI) || TII->isTypeDeclInstr(MI);
if (OpCode == SPIRV::OpName || OpCode == SPIRV::OpMemberName) {
collectOtherInstr(MI, MAI, SPIRV::MB_DebugNames);
} else if (OpCode == SPIRV::OpEntryPoint) {
@@ -182,12 +256,6 @@ void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) {
} else if (TII->isDecorationInstr(MI)) {
collectOtherInstr(MI, MAI, SPIRV::MB_Annotations);
collectFuncNames(MI, *F);
- } else if (IsConstOrType || (FCounter > 1 && IsFuncOrParm)) {
- // Now OpSpecConstant*s are not in DT,
- // but they need to be collected anyway.
- enum SPIRV::ModuleSectionType Type =
- IsFuncOrParm ? SPIRV::MB_ExtFuncDecls : SPIRV::MB_TypeConstVars;
- collectOtherInstr(MI, MAI, Type, IsConstOrType);
} else if (OpCode == SPIRV::OpFunction) {
collectFuncNames(MI, *F);
}
@@ -239,6 +307,7 @@ bool SPIRVModuleAnalysis::runOnModule(Module &M) {
// TODO: Process type/const/global var/func decl instructions, number their
// destination registers from 0 to N, collect Extensions and Capabilities.
+ processDefInstrs(M);
// Number rest of registers from N+1 onwards.
numberRegistersGlobally(M);
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index 1bef13d458c1..585868909d28 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_SPIRV_SPIRVMODULEANALYSIS_H
#include "MCTargetDesc/SPIRVBaseInfo.h"
+#include "SPIRVDuplicatesTracker.h"
#include "SPIRVSubtarget.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
@@ -123,6 +124,11 @@ public:
private:
void setBaseInfo(const Module &M);
template <typename T> void collectTypesConstsVars();
+ void collectGlobalEntities(
+ const std::vector<SPIRV::DTSortableEntry *> &DepsGraph,
+ SPIRV::ModuleSectionType MSType,
+ std::function<bool(const SPIRV::DTSortableEntry *)> Pred,
+ bool UsePreOrder);
void processDefInstrs(const Module &M);
void collectFuncNames(MachineInstr &MI, const Function &F);
void processOtherInstrs(const Module &M);
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index d75d41b35838..ee460002fc58 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -44,12 +44,11 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
class SparcMCCodeEmitter : public MCCodeEmitter {
- const MCInstrInfo &MCII;
MCContext &Ctx;
public:
- SparcMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : MCII(mcii), Ctx(ctx) {}
+ SparcMCCodeEmitter(const MCInstrInfo &, MCContext &ctx)
+ : Ctx(ctx) {}
SparcMCCodeEmitter(const SparcMCCodeEmitter &) = delete;
SparcMCCodeEmitter &operator=(const SparcMCCodeEmitter &) = delete;
~SparcMCCodeEmitter() override = default;
@@ -84,12 +83,6 @@ public:
unsigned getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
-
-private:
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
@@ -97,9 +90,6 @@ private:
void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
unsigned Bits = getBinaryCodeForInstr(MI, Fixups, STI);
support::endian::write(OS, Bits,
Ctx.getAsmInfo()->isLittleEndian() ? support::little
@@ -253,7 +243,6 @@ getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo,
return 0;
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SparcGenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 49b75b7e0bd1..b11c786e7856 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -24,6 +24,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SparcGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index 7ef043d9df40..8e6a9ebdb2dd 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -46,6 +46,7 @@ std::unique_ptr<MCObjectTargetWriter> createSparcELFObjectWriter(bool Is64Bit,
// Defines symbolic names for the Sparc instructions.
//
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "SparcGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index f6f9c0a1de81..c8961d507c72 100644
--- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -250,6 +250,8 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
}
void SparcAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ Sparc_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
switch (MI->getOpcode()) {
default: break;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index 242f566da2c9..1a71ff28424f 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -150,23 +150,13 @@ private:
return getPCRelEncoding(MI, OpNum, Fixups,
SystemZ::FK_390_PC24DBL, 3, false);
}
-
-private:
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
-void SystemZMCCodeEmitter::
-encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
+void SystemZMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
MemOpsEmitted = 0;
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
unsigned Size = MCII.get(MI.getOpcode()).getSize();
@@ -329,7 +319,6 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
return 0;
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SystemZGenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 03141ecf551d..08886507fdb7 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -23,6 +23,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SystemZGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index db4485423416..f2bfc9ac48e5 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -95,6 +95,7 @@ std::unique_ptr<MCObjectTargetWriter> createSystemZObjectWriter(uint8_t OSABI);
// Defines symbolic names for the SystemZ instructions.
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "SystemZGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 6fb080607f51..1d55bf9a5804 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -143,6 +143,9 @@ void SystemZAsmPrinter::emitCallInformation(CallType CT) {
}
void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ SystemZ_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
switch (MI->getOpcode()) {
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index a7ea5e1e4bf8..fdd82a01f211 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -162,11 +162,7 @@ def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>;
//===----------------------------------------------------------------------===//
// z/OS XPLINK64 callee-saved registers
//===----------------------------------------------------------------------===//
-// %R7D is volatile by the spec, but it must be saved in the prologue by
-// any non-leaf function and restored in the epilogue for use by the
-// return instruction so it functions exactly like a callee-saved register.
-def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 7, 15),
- (sequence "R%dD", 4, 4),
+def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 8, 15),
(sequence "F%dD", 15, 8))>;
def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64,
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 43bc7426cfa8..975eb8862e82 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -918,72 +918,74 @@ bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+ auto &GRRegClass = SystemZ::GR64BitRegClass;
+
+ // For non-leaf functions:
+ // - the address of callee (entry point) register R6 must be saved
+ CSI.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister()));
+ CSI.back().setRestored(false);
+
+ // The return address register R7 must be saved and restored.
+ CSI.push_back(CalleeSavedInfo(Regs.getReturnFunctionAddressRegister()));
+
+ // If the function needs a frame pointer, or if the backchain pointer should
+ // be stored, then save the stack pointer register R4.
+ if (hasFP(MF) || MF.getFunction().hasFnAttribute("backchain"))
+ CSI.push_back(CalleeSavedInfo(Regs.getStackPointerRegister()));
// Scan the call-saved GPRs and find the bounds of the register spill area.
- unsigned LowGPR = 0;
- int LowOffset = INT32_MAX;
- unsigned HighGPR = LowGPR;
+ Register LowRestoreGPR = 0;
+ int LowRestoreOffset = INT32_MAX;
+ Register LowSpillGPR = 0;
+ int LowSpillOffset = INT32_MAX;
+ Register HighGPR = 0;
int HighOffset = -1;
- unsigned RegSP = Regs.getStackPointerRegister();
- auto &GRRegClass = SystemZ::GR64BitRegClass;
- const unsigned RegSize = 8;
-
- auto ProcessCSI = [&](std::vector<CalleeSavedInfo> &CSIList) {
- for (auto &CS : CSIList) {
- Register Reg = CS.getReg();
- int Offset = RegSpillOffsets[Reg];
- if (Offset >= 0) {
- if (GRRegClass.contains(Reg)) {
- if (LowOffset > Offset) {
- LowOffset = Offset;
- LowGPR = Reg;
- }
+ for (auto &CS : CSI) {
+ Register Reg = CS.getReg();
+ int Offset = RegSpillOffsets[Reg];
+ if (Offset >= 0) {
+ if (GRRegClass.contains(Reg)) {
+ if (LowSpillOffset > Offset) {
+ LowSpillOffset = Offset;
+ LowSpillGPR = Reg;
+ }
+ if (CS.isRestored() && LowRestoreOffset > Offset) {
+ LowRestoreOffset = Offset;
+ LowRestoreGPR = Reg;
+ }
- if (Offset > HighOffset) {
- HighOffset = Offset;
- HighGPR = Reg;
- }
+ if (Offset > HighOffset) {
+ HighOffset = Offset;
+ HighGPR = Reg;
}
+ // Non-volatile GPRs are saved in the dedicated register save area at
+ // the bottom of the stack and are not truly part of the "normal" stack
+ // frame. Mark the frame index as NoAlloc to indicate it as such.
+ unsigned RegSize = 8;
int FrameIdx = MFFrame.CreateFixedSpillStackObject(RegSize, Offset);
CS.setFrameIdx(FrameIdx);
- } else
- CS.setFrameIdx(INT32_MAX);
+ MFFrame.setStackID(FrameIdx, TargetStackID::NoAlloc);
+ }
+ } else {
+ Register Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ Align Alignment = TRI->getSpillAlign(*RC);
+ unsigned Size = TRI->getSpillSize(*RC);
+ Alignment = std::min(Alignment, getStackAlign());
+ int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, true);
+ CS.setFrameIdx(FrameIdx);
}
- };
-
- std::vector<CalleeSavedInfo> Spills;
-
- // For non-leaf functions:
- // - the address of callee (entry point) register R6 must be saved
- Spills.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister()));
-
- // If the function needs a frame pointer, or if the backchain pointer should
- // be stored, then save the stack pointer register R4.
- if (hasFP(MF) || MF.getFunction().hasFnAttribute("backchain"))
- Spills.push_back(CalleeSavedInfo(RegSP));
+ }
// Save the range of call-saved registers, for use by the
// prologue/epilogue inserters.
- ProcessCSI(CSI);
- MFI->setRestoreGPRRegs(LowGPR, HighGPR, LowOffset);
+ if (LowRestoreGPR)
+ MFI->setRestoreGPRRegs(LowRestoreGPR, HighGPR, LowRestoreOffset);
// Save the range of call-saved registers, for use by the epilogue inserter.
- ProcessCSI(Spills);
- MFI->setSpillGPRRegs(LowGPR, HighGPR, LowOffset);
-
- // Create spill slots for the remaining registers.
- for (auto &CS : CSI) {
- if (CS.getFrameIdx() != INT32_MAX)
- continue;
- Register Reg = CS.getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- Align Alignment = TRI->getSpillAlign(*RC);
- unsigned Size = TRI->getSpillSize(*RC);
- Alignment = std::min(Alignment, getStackAlign());
- int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, true);
- CS.setFrameIdx(FrameIdx);
- }
+ assert(LowSpillGPR && "Expected registers to spill");
+ MFI->setSpillGPRRegs(LowSpillGPR, HighGPR, LowSpillOffset);
return true;
}
@@ -1001,13 +1003,6 @@ void SystemZXPLINKFrameLowering::determineCalleeSaves(MachineFunction &MF,
// frame pointer will be clobbered.
if (HasFP)
SavedRegs.set(Regs.getFramePointerRegister());
-
- // If the function is not an XPLeaf function, we need to save the
- // return address register. We also always use that register for
- // the return instruction, so it needs to be restored in the
- // epilogue even though that register is considered to be volatile.
- // #TODO: Implement leaf detection.
- SavedRegs.set(Regs.getReturnFunctionAddressRegister());
}
bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 8f633adbb9ef..29cc2840310d 100644
--- a/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -240,6 +240,13 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalObject *GO,
return SectionKind::getBSS();
}
+ // Global variables with '!exclude' should get the exclude section kind if
+ // they have an explicit section and no other metadata.
+ if (GVar->hasSection())
+ if (MDNode *MD = GVar->getMetadata(LLVMContext::MD_exclude))
+ if (!MD->getNumOperands())
+ return SectionKind::getExclude();
+
// If the global is marked constant, we can put it into a mergable section,
// a mergable string section, or general .data if it contains relocations.
if (GVar->isConstant()) {
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
index 3eb246f73679..45facd34f84e 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
@@ -39,12 +39,11 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
class VEMCCodeEmitter : public MCCodeEmitter {
- const MCInstrInfo &MCII;
MCContext &Ctx;
public:
- VEMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : MCII(mcii), Ctx(ctx) {}
+ VEMCCodeEmitter(const MCInstrInfo &, MCContext &ctx)
+ : Ctx(ctx) {}
VEMCCodeEmitter(const VEMCCodeEmitter &) = delete;
VEMCCodeEmitter &operator=(const VEMCCodeEmitter &) = delete;
~VEMCCodeEmitter() override = default;
@@ -74,12 +73,6 @@ public:
uint64_t getRDOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
-
-private:
- FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
- void
- verifyInstructionPredicates(const MCInst &MI,
- const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
@@ -87,9 +80,6 @@ private:
void VEMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
support::endian::write<uint64_t>(OS, Bits, support::little);
@@ -155,7 +145,6 @@ uint64_t VEMCCodeEmitter::getRDOpValue(const MCInst &MI, unsigned OpNo,
return 0;
}
-#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "VEGenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createVEMCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
index f4fbf763e59c..5a562d77f941 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -24,6 +24,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "VEGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
index d8f9d0634c24..935a0bfc0c4c 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
@@ -44,6 +44,7 @@ std::unique_ptr<MCObjectTargetWriter> createVEELFObjectWriter(uint8_t OSABI);
// Defines symbolic names for the VE instructions.
//
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "VEGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/VE/VEAsmPrinter.cpp b/llvm/lib/Target/VE/VEAsmPrinter.cpp
index af69d04a17ca..5553087d6f47 100644
--- a/llvm/lib/Target/VE/VEAsmPrinter.cpp
+++ b/llvm/lib/Target/VE/VEAsmPrinter.cpp
@@ -325,6 +325,8 @@ void VEAsmPrinter::lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI,
}
void VEAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ VE_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
switch (MI->getOpcode()) {
default:
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 85285749b4fa..e54453b31354 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -325,22 +325,22 @@ def VEMEMziiAsmOperand : AsmOperandClass {
// ASX format uses single assembly instruction format.
def MEMrri : Operand<iPTR> {
let PrintMethod = "printMemASXOperand";
- let MIOperandInfo = (ops ptr_rc, ptr_rc, i32imm);
+ let MIOperandInfo = (ops ptr_rc, ptr_rc, i64imm);
let ParserMatchClass = VEMEMrriAsmOperand;
}
def MEMrii : Operand<iPTR> {
let PrintMethod = "printMemASXOperand";
- let MIOperandInfo = (ops ptr_rc, i32imm, i32imm);
+ let MIOperandInfo = (ops ptr_rc, i32imm, i64imm);
let ParserMatchClass = VEMEMriiAsmOperand;
}
def MEMzri : Operand<iPTR> {
let PrintMethod = "printMemASXOperand";
- let MIOperandInfo = (ops i32imm /* = 0 */, ptr_rc, i32imm);
+ let MIOperandInfo = (ops i32imm /* = 0 */, ptr_rc, i64imm);
let ParserMatchClass = VEMEMzriAsmOperand;
}
def MEMzii : Operand<iPTR> {
let PrintMethod = "printMemASXOperand";
- let MIOperandInfo = (ops i32imm /* = 0 */, i32imm, i32imm);
+ let MIOperandInfo = (ops i32imm /* = 0 */, i32imm, i64imm);
let ParserMatchClass = VEMEMziiAsmOperand;
}
diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp
index d175ad26c742..f334af128162 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -27,6 +27,8 @@
using namespace llvm;
+#define DEBUG_TYPE "ve-register-info"
+
#define GET_REGINFO_TARGET_DESC
#include "VEGenRegisterInfo.inc"
@@ -133,66 +135,179 @@ static unsigned offsetToDisp(MachineInstr &MI) {
return OffDisp;
}
-static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
- MachineInstr &MI, const DebugLoc &dl,
- unsigned FIOperandNum, int Offset, Register FrameReg) {
- // Replace frame index with a frame pointer reference directly.
- // VE has 32 bit offset field, so no need to expand a target instruction.
- // Directly encode it.
+class EliminateFrameIndex {
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const DebugLoc &DL;
+ MachineBasicBlock &MBB;
+ MachineBasicBlock::iterator II;
+ Register clobber;
+
+ // Some helper functions for the ease of instruction building.
+ MachineFunction &getFunc() const { return *MBB.getParent(); }
+ inline MCRegister getSubReg(MCRegister Reg, unsigned Idx) const {
+ return TRI.getSubReg(Reg, Idx);
+ }
+ inline const MCInstrDesc &get(unsigned Opcode) const {
+ return TII.get(Opcode);
+ }
+ inline MachineInstrBuilder build(const MCInstrDesc &MCID, Register DestReg) {
+ return BuildMI(MBB, II, DL, MCID, DestReg);
+ }
+ inline MachineInstrBuilder build(unsigned InstOpc, Register DestReg) {
+ return build(get(InstOpc), DestReg);
+ }
+ inline MachineInstrBuilder build(const MCInstrDesc &MCID) {
+ return BuildMI(MBB, II, DL, MCID);
+ }
+ inline MachineInstrBuilder build(unsigned InstOpc) {
+ return build(get(InstOpc));
+ }
+
+ // Calculate an address of frame index from a frame register and a given
+ // offset if the offset doesn't fit in the immediate field. Use a clobber
+ // register to hold calculated address.
+ void prepareReplaceFI(MachineInstr &MI, Register &FrameReg, int64_t &Offset,
+ int64_t Bytes = 0);
+ // Replace the frame index in \p MI with a frame register and a given offset
+ // if it fits in the immediate field. Otherwise, use pre-calculated address
+ // in a clobber regsiter.
+ void replaceFI(MachineInstr &MI, Register FrameReg, int64_t Offset,
+ int FIOperandNum);
+
+ // Expand and eliminate Frame Index of pseudo STQrii and LDQrii.
+ void processSTQ(MachineInstr &MI, Register FrameReg, int64_t Offset,
+ int FIOperandNum);
+ void processLDQ(MachineInstr &MI, Register FrameReg, int64_t Offset,
+ int FIOperandNum);
+
+public:
+ EliminateFrameIndex(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
+ const DebugLoc &DL, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II)
+ : TII(TII), TRI(TRI), DL(DL), MBB(MBB), II(II), clobber(VE::SX13) {}
+
+ // Expand and eliminate Frame Index from MI
+ void processMI(MachineInstr &MI, Register FrameReg, int64_t Offset,
+ int FIOperandNum);
+};
+
+// Prepare the frame index if it doesn't fit in the immediate field. Use
+// clobber register to hold calculated address.
+void EliminateFrameIndex::prepareReplaceFI(MachineInstr &MI, Register &FrameReg,
+ int64_t &Offset, int64_t Bytes) {
+ if (isInt<32>(Offset) && isInt<32>(Offset + Bytes)) {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it. So, nothing to prepare here.
+ return;
+ }
+
+ // If the offset doesn't fit, emit following codes. This clobbers SX13
+ // which we always know is available here.
+ // lea %clobber, Offset@lo
+ // and %clobber, %clobber, (32)0
+ // lea.sl %clobber, Offset@hi(FrameReg, %clobber)
+ build(VE::LEAzii, clobber).addImm(0).addImm(0).addImm(Lo_32(Offset));
+ build(VE::ANDrm, clobber).addReg(clobber).addImm(M0(32));
+ build(VE::LEASLrri, clobber)
+ .addReg(clobber)
+ .addReg(FrameReg)
+ .addImm(Hi_32(Offset));
+
+ // Use clobber register as a frame register and 0 offset
+ FrameReg = clobber;
+ Offset = 0;
+}
+
+// Replace the frame index in \p MI with a proper byte and framereg offset.
+void EliminateFrameIndex::replaceFI(MachineInstr &MI, Register FrameReg,
+ int64_t Offset, int FIOperandNum) {
+ assert(isInt<32>(Offset));
+
+ // The offset must be small enough to fit in the immediate field after
+ // call of prepareReplaceFI. Therefore, we directly encode it.
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
MI.getOperand(FIOperandNum + offsetToDisp(MI)).ChangeToImmediate(Offset);
}
+void EliminateFrameIndex::processSTQ(MachineInstr &MI, Register FrameReg,
+ int64_t Offset, int FIOperandNum) {
+ assert(MI.getOpcode() == VE::STQrii);
+ LLVM_DEBUG(dbgs() << "processSTQ: "; MI.dump());
+
+ prepareReplaceFI(MI, FrameReg, Offset, 8);
+
+ Register SrcReg = MI.getOperand(3).getReg();
+ Register SrcHiReg = getSubReg(SrcReg, VE::sub_even);
+ Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd);
+ // VE stores HiReg to 8(addr) and LoReg to 0(addr)
+ MachineInstr *StMI =
+ build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg(SrcLoReg);
+ replaceFI(*StMI, FrameReg, Offset, 0);
+ // Mutate to 'hi' store.
+ MI.setDesc(get(VE::STrii));
+ MI.getOperand(3).setReg(SrcHiReg);
+ Offset += 8;
+ replaceFI(MI, FrameReg, Offset, FIOperandNum);
+}
+
+void EliminateFrameIndex::processLDQ(MachineInstr &MI, Register FrameReg,
+ int64_t Offset, int FIOperandNum) {
+ assert(MI.getOpcode() == VE::LDQrii);
+ LLVM_DEBUG(dbgs() << "processLDQ: "; MI.dump());
+
+ prepareReplaceFI(MI, FrameReg, Offset, 8);
+
+ Register DestReg = MI.getOperand(0).getReg();
+ Register DestHiReg = getSubReg(DestReg, VE::sub_even);
+ Register DestLoReg = getSubReg(DestReg, VE::sub_odd);
+ // VE loads HiReg from 8(addr) and LoReg from 0(addr)
+ MachineInstr *StMI =
+ build(VE::LDrii, DestLoReg).addReg(FrameReg).addImm(0).addImm(0);
+ replaceFI(*StMI, FrameReg, Offset, 1);
+ MI.setDesc(get(VE::LDrii));
+ MI.getOperand(0).setReg(DestHiReg);
+ Offset += 8;
+ replaceFI(MI, FrameReg, Offset, FIOperandNum);
+}
+
+void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg,
+ int64_t Offset, int FIOperandNum) {
+ switch (MI.getOpcode()) {
+ case VE::STQrii:
+ processSTQ(MI, FrameReg, Offset, FIOperandNum);
+ return;
+ case VE::LDQrii:
+ processLDQ(MI, FrameReg, Offset, FIOperandNum);
+ return;
+ }
+ prepareReplaceFI(MI, FrameReg, Offset);
+ replaceFI(MI, FrameReg, Offset, FIOperandNum);
+}
+
void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
- DebugLoc dl = MI.getDebugLoc();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+
MachineFunction &MF = *MI.getParent()->getParent();
- const VEFrameLowering *TFI = getFrameLowering(MF);
+ const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
+ const VEFrameLowering &TFI = *getFrameLowering(MF);
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ const VERegisterInfo &TRI = *Subtarget.getRegisterInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ EliminateFrameIndex EFI(TII, TRI, DL, *MI.getParent(), II);
+ // Retrieve FrameReg and byte offset for stack slot.
Register FrameReg;
- int Offset;
- Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed();
-
+ int64_t Offset =
+ TFI.getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed();
Offset += MI.getOperand(FIOperandNum + offsetToDisp(MI)).getImm();
- if (MI.getOpcode() == VE::STQrii) {
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- Register SrcReg = MI.getOperand(3).getReg();
- Register SrcHiReg = getSubReg(SrcReg, VE::sub_even);
- Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd);
- // VE stores HiReg to 8(addr) and LoReg to 0(addr)
- MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STrii))
- .addReg(FrameReg)
- .addImm(0)
- .addImm(0)
- .addReg(SrcLoReg);
- replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
- MI.setDesc(TII.get(VE::STrii));
- MI.getOperand(3).setReg(SrcHiReg);
- Offset += 8;
- } else if (MI.getOpcode() == VE::LDQrii) {
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- Register DestReg = MI.getOperand(0).getReg();
- Register DestHiReg = getSubReg(DestReg, VE::sub_even);
- Register DestLoReg = getSubReg(DestReg, VE::sub_odd);
- // VE loads HiReg from 8(addr) and LoReg from 0(addr)
- MachineInstr *StMI =
- BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDrii), DestLoReg)
- .addReg(FrameReg)
- .addImm(0)
- .addImm(0);
- replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
- MI.setDesc(TII.get(VE::LDrii));
- MI.getOperand(0).setReg(DestHiReg);
- Offset += 8;
- }
-
- replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
+ EFI.processMI(MI, FrameReg, Offset, FIOperandNum);
}
Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp
index 330eef4c7c2b..f88f298bc603 100644
--- a/llvm/lib/Target/VE/VVPISelLowering.cpp
+++ b/llvm/lib/Target/VE/VVPISelLowering.cpp
@@ -41,7 +41,7 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
auto VVPOpcodeOpt = getVVPOpcode(Opcode);
if (!VVPOpcodeOpt)
return SDValue();
- unsigned VVPOpcode = VVPOpcodeOpt.getValue();
+ unsigned VVPOpcode = VVPOpcodeOpt.value();
const bool FromVP = ISD::isVPOpcode(Opcode);
// The representative and legalized vector type of this operation.
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
index ec72c1de0503..d31715e367ec 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
@@ -87,15 +87,14 @@ bool WebAssemblyAsmTypeCheck::popType(SMLoc ErrorLoc,
if (Stack.empty()) {
return typeError(ErrorLoc,
EVT ? StringRef("empty stack while popping ") +
- WebAssembly::typeToString(EVT.getValue())
+ WebAssembly::typeToString(EVT.value())
: StringRef("empty stack while popping value"));
}
auto PVT = Stack.pop_back_val();
- if (EVT && EVT.getValue() != PVT) {
+ if (EVT && EVT.value() != PVT) {
return typeError(
ErrorLoc, StringRef("popped ") + WebAssembly::typeToString(PVT) +
- ", expected " +
- WebAssembly::typeToString(EVT.getValue()));
+ ", expected " + WebAssembly::typeToString(EVT.value()));
}
return false;
}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index f52545a65dbb..97dbc35c991b 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -26,6 +26,7 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-mc-target-desc"
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "WebAssemblyGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 75d5d0675990..b5b12200505b 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -124,6 +124,7 @@ enum TOF {
// Defines symbolic names for the WebAssembly instructions.
//
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "WebAssemblyGenInstrInfo.inc"
namespace llvm {
diff --git a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
index e3daf6bfa72e..ef2c77ade8cc 100644
--- a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -37,4 +37,5 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTargetInfo() {
// which have to be in a shared location between CodeGen and MC.
#define GET_INSTRMAP_INFO 1
#define GET_INSTRINFO_ENUM 1
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "WebAssemblyGenInstrInfo.inc"
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
index 0f1655718481..f380b2582c65 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
@@ -13,6 +13,7 @@
#include "WebAssemblyTypeUtilities.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
// Get register classes enum.
#define GET_REGINFO_ENUM
@@ -168,6 +169,11 @@ wasm::ValType WebAssembly::regClassToValType(unsigned RC) {
}
}
+wasm::ValType WebAssembly::regClassToValType(const TargetRegisterClass *RC) {
+ assert(RC != nullptr);
+ return regClassToValType(RC->getID());
+}
+
void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
const SmallVector<MVT, 1> &VTs) {
assert(!Sym->getType());
@@ -175,33 +181,28 @@ void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
// Tables are represented as Arrays in LLVM IR therefore
// they reach this point as aggregate Array types with an element type
// that is a reference type.
- wasm::ValType Type;
+ wasm::ValType ValTy;
bool IsTable = false;
if (GlobalVT->isArrayTy() &&
WebAssembly::isRefType(GlobalVT->getArrayElementType())) {
- MVT VT;
IsTable = true;
- switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) {
- case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF:
- VT = MVT::funcref;
- break;
- case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF:
- VT = MVT::externref;
- break;
- default:
- report_fatal_error("unhandled address space type");
- }
- Type = WebAssembly::toValType(VT);
+ const Type *ElTy = GlobalVT->getArrayElementType();
+ if (WebAssembly::isExternrefType(ElTy))
+ ValTy = wasm::ValType::EXTERNREF;
+ else if (WebAssembly::isFuncrefType(ElTy))
+ ValTy = wasm::ValType::FUNCREF;
+ else
+ report_fatal_error("unhandled reference type");
} else if (VTs.size() == 1) {
- Type = WebAssembly::toValType(VTs[0]);
+ ValTy = WebAssembly::toValType(VTs[0]);
} else
report_fatal_error("Aggregate globals not yet implemented");
if (IsTable) {
Sym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
- Sym->setTableType(Type);
+ Sym->setTableType(ValTy);
} else {
Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
- Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true});
+ Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(ValTy), /*Mutable=*/true});
}
}
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
index 8fc67d37925c..86211700c70a 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
@@ -22,6 +22,9 @@
#include "llvm/Support/MachineValueType.h"
namespace llvm {
+
+class TargetRegisterClass;
+
namespace WebAssembly {
/// Used as immediate MachineOperands for block signatures
@@ -108,9 +111,12 @@ std::string signatureToString(const wasm::WasmSignature *Sig);
// Convert a MVT into its corresponding wasm ValType.
wasm::ValType toValType(MVT Type);
-// Convert a register class to a wasm ValType.
+// Convert a register class ID to a wasm ValType.
wasm::ValType regClassToValType(unsigned RC);
+// Convert a register class to a wasm ValType.
+wasm::ValType regClassToValType(const TargetRegisterClass *RC);
+
/// Sets a Wasm Symbol Type.
void wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
const SmallVector<MVT, 1> &VTs);
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp
index b87c884c9e4a..277bbee83a6f 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp
@@ -179,3 +179,25 @@ MachineInstr *WebAssembly::findCatch(MachineBasicBlock *EHPad) {
return &*Pos;
return nullptr;
}
+
+unsigned WebAssembly::getCopyOpcodeForRegClass(const TargetRegisterClass *RC) {
+ assert(RC != nullptr);
+ switch (RC->getID()) {
+ case WebAssembly::I32RegClassID:
+ return WebAssembly::COPY_I32;
+ case WebAssembly::I64RegClassID:
+ return WebAssembly::COPY_I64;
+ case WebAssembly::F32RegClassID:
+ return WebAssembly::COPY_F32;
+ case WebAssembly::F64RegClassID:
+ return WebAssembly::COPY_F64;
+ case WebAssembly::V128RegClassID:
+ return WebAssembly::COPY_V128;
+ case WebAssembly::FUNCREFRegClassID:
+ return WebAssembly::COPY_FUNCREF;
+ case WebAssembly::EXTERNREFRegClassID:
+ return WebAssembly::COPY_EXTERNREF;
+ default:
+ llvm_unreachable("Unexpected register class");
+ }
+}
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
index cdfc758db7ac..d0639208fda9 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
@@ -24,6 +24,7 @@ class MachineInstr;
class MachineOperand;
class MCContext;
class MCSymbolWasm;
+class TargetRegisterClass;
class WebAssemblyFunctionInfo;
class WebAssemblySubtarget;
@@ -65,6 +66,9 @@ getOrCreateFuncrefCallTableSymbol(MCContext &Ctx,
/// instruction found or the catch is in an invalid location.
MachineInstr *findCatch(MachineBasicBlock *EHPad);
+/// Returns the appropriate copy opcode for the given register class.
+unsigned getCopyOpcodeForRegClass(const TargetRegisterClass *RC);
+
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 57d51634e849..bcb6cf1b4e1d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -597,6 +597,8 @@ void WebAssemblyAsmPrinter::emitFunctionBodyStart() {
void WebAssemblyAsmPrinter::emitInstruction(const MachineInstr *MI) {
LLVM_DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
+ WebAssembly_MC::verifyInstructionPredicates(MI->getOpcode(),
+ Subtarget->getFeatureBits());
switch (MI->getOpcode()) {
case WebAssembly::ARGUMENT_i32:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 02e873a0f9a6..d2eb4b29e9fd 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -781,25 +781,6 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
}
}
-// Get the appropriate copy opcode for the given register class.
-static unsigned getCopyOpcode(const TargetRegisterClass *RC) {
- if (RC == &WebAssembly::I32RegClass)
- return WebAssembly::COPY_I32;
- if (RC == &WebAssembly::I64RegClass)
- return WebAssembly::COPY_I64;
- if (RC == &WebAssembly::F32RegClass)
- return WebAssembly::COPY_F32;
- if (RC == &WebAssembly::F64RegClass)
- return WebAssembly::COPY_F64;
- if (RC == &WebAssembly::V128RegClass)
- return WebAssembly::COPY_V128;
- if (RC == &WebAssembly::FUNCREFRegClass)
- return WebAssembly::COPY_FUNCREF;
- if (RC == &WebAssembly::EXTERNREFRegClass)
- return WebAssembly::COPY_EXTERNREF;
- llvm_unreachable("Unexpected register class");
-}
-
// When MBB is split into MBB and Split, we should unstackify defs in MBB that
// have their uses in Split.
static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
@@ -851,7 +832,8 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
if (!MFI.isVRegStackified(TeeReg)) {
// Now we are not using TEE anymore, so unstackify DefReg too
MFI.unstackifyVReg(DefReg);
- unsigned CopyOpc = getCopyOpcode(MRI.getRegClass(DefReg));
+ unsigned CopyOpc =
+ WebAssembly::getCopyOpcodeForRegClass(MRI.getRegClass(DefReg));
BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), TeeReg)
.addReg(DefReg);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), Reg).addReg(DefReg);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 5484c0db7775..9316826e3d92 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -66,23 +66,7 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
? MRI.getRegClass(DestReg)
: MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(DestReg);
- unsigned CopyOpcode;
- if (RC == &WebAssembly::I32RegClass)
- CopyOpcode = WebAssembly::COPY_I32;
- else if (RC == &WebAssembly::I64RegClass)
- CopyOpcode = WebAssembly::COPY_I64;
- else if (RC == &WebAssembly::F32RegClass)
- CopyOpcode = WebAssembly::COPY_F32;
- else if (RC == &WebAssembly::F64RegClass)
- CopyOpcode = WebAssembly::COPY_F64;
- else if (RC == &WebAssembly::V128RegClass)
- CopyOpcode = WebAssembly::COPY_V128;
- else if (RC == &WebAssembly::FUNCREFRegClass)
- CopyOpcode = WebAssembly::COPY_FUNCREF;
- else if (RC == &WebAssembly::EXTERNREFRegClass)
- CopyOpcode = WebAssembly::COPY_EXTERNREF;
- else
- llvm_unreachable("Unexpected register class");
+ unsigned CopyOpcode = WebAssembly::getCopyOpcodeForRegClass(RC);
BuildMI(MBB, I, DL, get(CopyOpcode), DestReg)
.addReg(SrcReg, KillSrc ? RegState::Kill : 0);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 2db4bd822349..7a1a769c6b16 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -553,7 +553,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
std::tie(SizeArg, NEltArg) = FnAttrs.getAllocSizeArgs();
SizeArg += 1;
if (NEltArg)
- NEltArg = NEltArg.getValue() + 1;
+ NEltArg = NEltArg.value() + 1;
FnAttrs.addAllocSizeAttr(SizeArg, NEltArg);
}
// In case the callee has 'noreturn' attribute, We need to remove it, because
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 2e6027a5605c..e8b3542df12f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -154,25 +154,6 @@ MCOperand WebAssemblyMCInstLower::lowerTypeIndexOperand(
return MCOperand::createExpr(Expr);
}
-// Return the WebAssembly type associated with the given register class.
-static wasm::ValType getType(const TargetRegisterClass *RC) {
- if (RC == &WebAssembly::I32RegClass)
- return wasm::ValType::I32;
- if (RC == &WebAssembly::I64RegClass)
- return wasm::ValType::I64;
- if (RC == &WebAssembly::F32RegClass)
- return wasm::ValType::F32;
- if (RC == &WebAssembly::F64RegClass)
- return wasm::ValType::F64;
- if (RC == &WebAssembly::V128RegClass)
- return wasm::ValType::V128;
- if (RC == &WebAssembly::EXTERNREFRegClass)
- return wasm::ValType::EXTERNREF;
- if (RC == &WebAssembly::FUNCREFRegClass)
- return wasm::ValType::FUNCREF;
- llvm_unreachable("Unexpected register class");
-}
-
static void getFunctionReturns(const MachineInstr *MI,
SmallVectorImpl<wasm::ValType> &Returns) {
const Function &F = MI->getMF()->getFunction();
@@ -221,10 +202,12 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
const MachineRegisterInfo &MRI =
MI->getParent()->getParent()->getRegInfo();
for (const MachineOperand &MO : MI->defs())
- Returns.push_back(getType(MRI.getRegClass(MO.getReg())));
+ Returns.push_back(
+ WebAssembly::regClassToValType(MRI.getRegClass(MO.getReg())));
for (const MachineOperand &MO : MI->explicit_uses())
if (MO.isReg())
- Params.push_back(getType(MRI.getRegClass(MO.getReg())));
+ Params.push_back(
+ WebAssembly::regClassToValType(MRI.getRegClass(MO.getReg())));
// call_indirect instructions have a callee operand at the end which
// doesn't count as a param.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index ba1c4b7233f2..5fcee7af9bde 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
@@ -95,31 +96,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
if (!MFI.isVRegStackified(Reg)) {
unsigned CopyLocalOpc;
const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
- switch (RegClass->getID()) {
- case WebAssembly::I32RegClassID:
- CopyLocalOpc = WebAssembly::COPY_I32;
- break;
- case WebAssembly::I64RegClassID:
- CopyLocalOpc = WebAssembly::COPY_I64;
- break;
- case WebAssembly::F32RegClassID:
- CopyLocalOpc = WebAssembly::COPY_F32;
- break;
- case WebAssembly::F64RegClassID:
- CopyLocalOpc = WebAssembly::COPY_F64;
- break;
- case WebAssembly::V128RegClassID:
- CopyLocalOpc = WebAssembly::COPY_V128;
- break;
- case WebAssembly::FUNCREFRegClassID:
- CopyLocalOpc = WebAssembly::COPY_FUNCREF;
- break;
- case WebAssembly::EXTERNREFRegClassID:
- CopyLocalOpc = WebAssembly::COPY_EXTERNREF;
- break;
- default:
- llvm_unreachable("Unexpected register class for return operand");
- }
+ CopyLocalOpc = WebAssembly::getCopyOpcodeForRegClass(RegClass);
Register NewReg = MRI.createVirtualRegister(RegClass);
BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg)
.addReg(Reg);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index 388c0f9110b7..0b3e534315d5 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -21,7 +21,6 @@
#include "WebAssemblyRuntimeLibcallSignatures.h"
#include "WebAssemblySubtarget.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
-#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@@ -482,10 +481,13 @@ struct RuntimeLibcallSignatureTable {
}
};
-ManagedStatic<RuntimeLibcallSignatureTable> RuntimeLibcallSignatures;
+RuntimeLibcallSignatureTable &getRuntimeLibcallSignatures() {
+ static RuntimeLibcallSignatureTable RuntimeLibcallSignatures;
+ return RuntimeLibcallSignatures;
+}
// Maps libcall names to their RTLIB::Libcall number. Builds the map in a
-// constructor for use with ManagedStatic
+// constructor for use with a static variable
struct StaticLibcallNameMap {
StringMap<RTLIB::Libcall> Map;
StaticLibcallNameMap() {
@@ -496,7 +498,8 @@ struct StaticLibcallNameMap {
};
for (const auto &NameLibcall : NameLibcalls) {
if (NameLibcall.first != nullptr &&
- RuntimeLibcallSignatures->Table[NameLibcall.second] != unsupported) {
+ getRuntimeLibcallSignatures().Table[NameLibcall.second] !=
+ unsupported) {
assert(Map.find(NameLibcall.first) == Map.end() &&
"duplicate libcall names in name map");
Map[NameLibcall.first] = NameLibcall.second;
@@ -523,7 +526,7 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
wasm::ValType PtrTy =
Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32;
- auto &Table = RuntimeLibcallSignatures->Table;
+ auto &Table = getRuntimeLibcallSignatures().Table;
switch (Table[LC]) {
case func:
break;
@@ -885,14 +888,14 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
}
}
-static ManagedStatic<StaticLibcallNameMap> LibcallNameMap;
// TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed
// other than here, just roll its logic into this version.
void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
StringRef Name,
SmallVectorImpl<wasm::ValType> &Rets,
SmallVectorImpl<wasm::ValType> &Params) {
- auto &Map = LibcallNameMap->Map;
+ static StaticLibcallNameMap LibcallNameMap;
+ auto &Map = LibcallNameMap.Map;
auto Val = Map.find(Name);
#ifndef NDEBUG
if (Val == Map.end()) {
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index a903c5f455a2..da90befb2320 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -622,7 +622,7 @@ static bool printFMAComments(const MCInst *MI, raw_ostream &OS,
OS << '-';
OS << '(' << Mul1Name << " * " << Mul2Name << ") " << AccStr << ' '
- << AccName;
+ << AccName << '\n';
return true;
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp
index 901082ce6cf3..640efd468135 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp
@@ -13,6 +13,7 @@
#include "X86InstrRelaxTables.h"
#include "X86InstrInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include <atomic>
using namespace llvm;
@@ -119,7 +120,7 @@ const X86InstrRelaxTableEntry *llvm::lookupRelaxTable(unsigned ShortOp) {
namespace {
// This class stores the short form tables. It is instantiated as a
-// ManagedStatic to lazily init the short form table.
+// function scope static variable to lazily init the short form table.
struct X86ShortFormTable {
// Stores relaxation table entries sorted by relaxed form opcode.
SmallVector<X86InstrRelaxTableEntry, 0> Table;
@@ -137,10 +138,9 @@ struct X86ShortFormTable {
};
} // namespace
-static ManagedStatic<X86ShortFormTable> ShortTable;
-
const X86InstrRelaxTableEntry *llvm::lookupShortTable(unsigned RelaxOp) {
- auto &Table = ShortTable->Table;
+ static X86ShortFormTable ShortTable;
+ auto &Table = ShortTable.Table;
auto I = llvm::lower_bound(Table, RelaxOp);
if (I != Table.end() && I->KeyOp == RelaxOp)
return &*I;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 49660883ad83..4c962de16530 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -37,6 +37,7 @@ using namespace llvm;
#define GET_INSTRINFO_MC_DESC
#define GET_INSTRINFO_MC_HELPERS
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "X86GenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 7344900f2e31..0ac916527495 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -132,6 +132,9 @@ FunctionPass *createX86EvexToVexInsts();
/// This pass creates the thunks for the retpoline feature.
FunctionPass *createX86IndirectThunksPass();
+/// This pass replaces ret instructions with jmp's to __x86_return thunk.
+FunctionPass *createX86ReturnThunksPass();
+
/// This pass ensures instructions featuring a memory operand
/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
FunctionPass *createX86DiscriminateMemOpsPass();
@@ -185,6 +188,7 @@ void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &);
void initializeX86PreAMXConfigPassPass(PassRegistry &);
void initializeX86LowerTileCopyPass(PassRegistry &);
void initializeX86LowerAMXIntrinsicsLegacyPassPass(PassRegistry &);
+void initializeX86ReturnThunksPass(PassRegistry &);
namespace X86AS {
enum : unsigned {
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a5c6b40c493c..a859176220c7 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -266,6 +266,8 @@ def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
"Write Back No Invalidate">;
def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
"Support RDPID instructions">;
+def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
+ "Support RDPRU instructions">;
def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
"Wait and pause enhancements">;
def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
@@ -1238,6 +1240,7 @@ def ProcessorFeatures {
TuningInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
FeatureRDPID,
+ FeatureRDPRU,
FeatureWBNOINVD];
list<SubtargetFeature> ZN2Tuning = ZNTuning;
list<SubtargetFeature> ZN2Features =
diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp
index c7a013a0b17a..cff95d17c14c 100644
--- a/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/llvm/lib/Target/X86/X86EvexToVex.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
+#include <atomic>
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 61c1fd25031d..12af6087cb47 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -594,7 +594,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Half type will be promoted by default.
setOperationAction(ISD::FABS, MVT::f16, Promote);
setOperationAction(ISD::FNEG, MVT::f16, Promote);
- setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
setOperationAction(ISD::FADD, MVT::f16, Promote);
setOperationAction(ISD::FSUB, MVT::f16, Promote);
setOperationAction(ISD::FMUL, MVT::f16, Promote);
@@ -629,6 +629,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall);
setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
+
+ setOperationAction(ISD::STRICT_FADD, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMA, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMINIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, LibCall);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
@@ -2817,6 +2845,21 @@ Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
AddressSpace = X86AS::FS;
else if (GuardReg == "gs")
AddressSpace = X86AS::GS;
+
+ // Use symbol guard if user specify.
+ StringRef GuardSymb = M->getStackProtectorGuardSymbol();
+ if (!GuardSymb.empty()) {
+ GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
+ if (!GV) {
+ Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
+ : Type::getInt32Ty(M->getContext());
+ GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
+ nullptr, GuardSymb, nullptr,
+ GlobalValue::NotThreadLocal, AddressSpace);
+ }
+ return GV;
+ }
+
return SegmentOffset(IRB, Offset, AddressSpace);
}
}
@@ -11757,15 +11800,17 @@ static bool isShuffleEquivalent(ArrayRef<int> Mask, ArrayRef<int> ExpectedMask,
/// value in ExpectedMask is always accepted. Otherwise the indices must match.
///
/// SM_SentinelZero is accepted as a valid negative index but must match in
-/// both.
+/// both, or via a known bits test.
static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
ArrayRef<int> ExpectedMask,
+ const SelectionDAG &DAG,
SDValue V1 = SDValue(),
SDValue V2 = SDValue()) {
int Size = Mask.size();
if (Size != (int)ExpectedMask.size())
return false;
- assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) &&
+ assert(llvm::all_of(ExpectedMask,
+ [Size](int M) { return isInRange(M, 0, 2 * Size); }) &&
"Illegal target shuffle mask");
// Check for out-of-range target shuffle mask indices.
@@ -11778,12 +11823,28 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits())
V2 = SDValue();
+ APInt ZeroV1 = APInt::getNullValue(Size);
+ APInt ZeroV2 = APInt::getNullValue(Size);
+
for (int i = 0; i < Size; ++i) {
int MaskIdx = Mask[i];
int ExpectedIdx = ExpectedMask[i];
if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx)
continue;
- if (0 <= MaskIdx && 0 <= ExpectedIdx) {
+ if (MaskIdx == SM_SentinelZero) {
+ // If we need this expected index to be a zero element, then update the
+ // relevant zero mask and perform the known bits at the end to minimize
+ // repeated computes.
+ SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
+ if (ExpectedV &&
+ Size == (int)ExpectedV.getValueType().getVectorNumElements()) {
+ int BitIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size);
+ APInt &ZeroMask = ExpectedIdx < Size ? ZeroV1 : ZeroV2;
+ ZeroMask.setBit(BitIdx);
+ continue;
+ }
+ }
+ if (MaskIdx >= 0) {
SDValue MaskV = MaskIdx < Size ? V1 : V2;
SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2;
MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size);
@@ -11791,15 +11852,16 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
if (IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx))
continue;
}
- // TODO - handle SM_Sentinel equivalences.
return false;
}
- return true;
+ return (ZeroV1.isNullValue() || DAG.MaskedVectorIsZero(V1, ZeroV1)) &&
+ (ZeroV2.isNullValue() || DAG.MaskedVectorIsZero(V2, ZeroV2));
}
// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
// instructions.
-static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
+static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT,
+ const SelectionDAG &DAG) {
if (VT != MVT::v8i32 && VT != MVT::v8f32)
return false;
@@ -11809,12 +11871,13 @@ static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
SmallVector<int, 8> Unpckhwd;
createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false,
/* Unary = */ false);
- bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd) ||
- isTargetShuffleEquivalent(VT, Mask, Unpckhwd));
+ bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd, DAG) ||
+ isTargetShuffleEquivalent(VT, Mask, Unpckhwd, DAG));
return IsUnpackwdMask;
}
-static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) {
+static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask,
+ const SelectionDAG &DAG) {
// Create 128-bit vector type based on mask size.
MVT EltVT = MVT::getIntegerVT(128 / Mask.size());
MVT VT = MVT::getVectorVT(EltVT, Mask.size());
@@ -11827,8 +11890,8 @@ static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) {
for (unsigned i = 0; i != 4; ++i) {
SmallVector<int, 16> UnpackMask;
createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2);
- if (isTargetShuffleEquivalent(VT, Mask, UnpackMask) ||
- isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask))
+ if (isTargetShuffleEquivalent(VT, Mask, UnpackMask, DAG) ||
+ isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask, DAG))
return true;
}
return false;
@@ -12021,7 +12084,7 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
// Attempt to match the target mask against the unpack lo/hi mask patterns.
SmallVector<int, 64> Unpckl, Unpckh;
createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
- if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1,
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG, V1,
(IsUnary ? V1 : V2))) {
UnpackOpcode = X86ISD::UNPCKL;
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
@@ -12030,7 +12093,7 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
}
createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
- if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1,
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG, V1,
(IsUnary ? V1 : V2))) {
UnpackOpcode = X86ISD::UNPCKH;
V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
@@ -12069,14 +12132,14 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
// If a binary shuffle, commute and try again.
if (!IsUnary) {
ShuffleVectorSDNode::commuteMask(Unpckl);
- if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) {
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG)) {
UnpackOpcode = X86ISD::UNPCKL;
std::swap(V1, V2);
return true;
}
ShuffleVectorSDNode::commuteMask(Unpckh);
- if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) {
+ if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG)) {
UnpackOpcode = X86ISD::UNPCKH;
std::swap(V1, V2);
return true;
@@ -12464,14 +12527,14 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,
// Try binary shuffle.
SmallVector<int, 32> BinaryMask;
createPackShuffleMask(VT, BinaryMask, false, NumStages);
- if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, V1, V2))
+ if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, DAG, V1, V2))
if (MatchPACK(V1, V2, PackVT))
return true;
// Try unary shuffle.
SmallVector<int, 32> UnaryMask;
createPackShuffleMask(VT, UnaryMask, true, NumStages);
- if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, V1))
+ if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, DAG, V1))
if (MatchPACK(V1, V1, PackVT))
return true;
}
@@ -14283,7 +14346,7 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
// and a simple narrow shuffle. Prefer extract+unpack(h/l)ps to vpermps
// because that avoids a constant load from memory.
if (NumElts == 4 &&
- (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask)))
+ (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask, DAG)))
return SDValue();
// Extend the shuffle mask with undef elements.
@@ -17230,7 +17293,7 @@ static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1,
if (Subtarget.hasAVX2()) {
// extract128 + vunpckhps/vshufps, is better than vblend + vpermps.
if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() &&
- !is128BitUnpackShuffleMask(HalfMask) &&
+ !is128BitUnpackShuffleMask(HalfMask, DAG) &&
(!isSingleSHUFPSMask(HalfMask) ||
Subtarget.hasFastVariableCrossLaneShuffle()))
return SDValue();
@@ -17892,7 +17955,7 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
// since after split we get a more efficient code using vpunpcklwd and
// vpunpckhwd instrs than vblend.
- if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
+ if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32, DAG))
return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget,
DAG);
@@ -17930,7 +17993,7 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
// since after split we get a more efficient code than vblend by using
// vpunpcklwd and vpunpckhwd instrs.
- if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
+ if (isUnpackWdShuffleMask(Mask, MVT::v8i32, DAG) && !V2.isUndef() &&
!Subtarget.hasAVX512())
return lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget,
DAG);
@@ -27887,11 +27950,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
}
// Read Performance Monitoring Counters.
case RDPMC:
+ // Read Processor Register.
+ case RDPRU:
// GetExtended Control Register.
case XGETBV: {
SmallVector<SDValue, 2> Results;
// RDPMC uses ECX to select the index of the performance counter to read.
+ // RDPRU uses ECX to select the processor register to read.
// XGETBV uses ECX to select the index of the XCR register to return.
// The result is stored into registers EDX:EAX.
expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX,
@@ -29902,14 +29968,12 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
SDValue Amt01 = DAG.getBitcast(MVT::v8i16, Amt);
SDValue Amt23 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01,
{4, 5, 6, 7, -1, -1, -1, -1});
- Amt0 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01,
- {0, 1, 1, 1, -1, -1, -1, -1});
- Amt1 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01,
- {2, 3, 3, 3, -1, -1, -1, -1});
- Amt2 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23,
- {0, 1, 1, 1, -1, -1, -1, -1});
- Amt3 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23,
- {2, 3, 3, 3, -1, -1, -1, -1});
+ SDValue Msk02 = getV4X86ShuffleImm8ForMask({0, 1, 1, 1}, dl, DAG);
+ SDValue Msk13 = getV4X86ShuffleImm8ForMask({2, 3, 3, 3}, dl, DAG);
+ Amt0 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt01, Msk02);
+ Amt1 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt01, Msk13);
+ Amt2 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt23, Msk02);
+ Amt3 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt23, Msk13);
}
}
@@ -30797,6 +30861,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::UMin:
case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:
+ case AtomicRMWInst::FMax:
+ case AtomicRMWInst::FMin:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
return AtomicExpansionKind::CmpXChg;
@@ -32894,6 +32960,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget,
Results);
return;
+ case Intrinsic::x86_rdpru:
+ expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPRU, X86::ECX, Subtarget,
+ Results);
+ return;
case Intrinsic::x86_xgetbv:
expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget,
Results);
@@ -36985,8 +37055,9 @@ static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT,
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
bool AllowFloatDomain, bool AllowIntDomain,
- SDValue V1, const X86Subtarget &Subtarget,
- unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
+ SDValue V1, const SelectionDAG &DAG,
+ const X86Subtarget &Subtarget, unsigned &Shuffle,
+ MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
@@ -37057,17 +37128,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// instructions are no slower than UNPCKLPD but has the option to
// fold the input operand into even an unaligned memory load.
if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG, V1)) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v2f64;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, DAG, V1)) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, DAG, V1)) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v4f32;
return true;
@@ -37076,17 +37147,19 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (MaskVT.is256BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, DAG, V1)) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v4f64;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG,
+ V1)) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v8f32;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, DAG,
+ V1)) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v8f32;
return true;
@@ -37096,21 +37169,22 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (MaskVT.is512BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX512() &&
"AVX512 required for 512-bit vector shuffles");
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG,
+ V1)) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v8f64;
return true;
}
if (isTargetShuffleEquivalent(
MaskVT, Mask,
- {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) {
+ {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, DAG, V1)) {
Shuffle = X86ISD::MOVSLDUP;
SrcVT = DstVT = MVT::v16f32;
return true;
}
if (isTargetShuffleEquivalent(
MaskVT, Mask,
- {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) {
+ {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, DAG, V1)) {
Shuffle = X86ISD::MOVSHDUP;
SrcVT = DstVT = MVT::v16f32;
return true;
@@ -37126,6 +37200,7 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
const APInt &Zeroable,
bool AllowFloatDomain, bool AllowIntDomain,
+ const SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
@@ -37269,33 +37344,36 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}) && AllowFloatDomain) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG) &&
+ AllowFloatDomain) {
V2 = V1;
V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1);
Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS;
SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}) && AllowFloatDomain) {
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}, DAG) &&
+ AllowFloatDomain) {
V2 = V1;
Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS;
SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}) &&
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}, DAG) &&
Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) {
std::swap(V1, V2);
Shuffle = X86ISD::MOVSD;
SrcVT = DstVT = MVT::v2f64;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}) &&
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}, DAG) &&
(AllowFloatDomain || !Subtarget.hasSSE41())) {
Shuffle = X86ISD::MOVSS;
SrcVT = DstVT = MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7}) &&
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7},
+ DAG) &&
Subtarget.hasFP16()) {
Shuffle = X86ISD::MOVSH;
SrcVT = DstVT = MVT::v8f16;
@@ -37678,7 +37756,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
scaleShuffleElements(Mask, NumElts, ScaledMask)) {
for (unsigned i = 0; i != NumElts; ++i)
IdentityMask.push_back(i);
- if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2))
+ if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, DAG, V1,
+ V2))
return CanonicalizeShuffleInput(RootVT, V1);
}
}
@@ -37902,7 +37981,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, V1,
- Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) &&
+ DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) &&
(!IsMaskedShuffle ||
(NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
@@ -37913,7 +37992,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
- AllowIntDomain, Subtarget, Shuffle, ShuffleVT,
+ AllowIntDomain, DAG, Subtarget, Shuffle, ShuffleVT,
PermuteImm) &&
(!IsMaskedShuffle ||
(NumRootElts == ShuffleVT.getVectorNumElements()))) {
@@ -37931,7 +38010,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// TODO: Handle other insertions here as well?
if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 &&
Subtarget.hasSSE41() &&
- !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3})) {
+ !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}, DAG)) {
if (MaskEltSizeInBits == 32) {
SDValue SrcV1 = V1, SrcV2 = V2;
if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask,
@@ -37947,12 +38026,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
if (MaskEltSizeInBits == 64 &&
- isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}) &&
+ isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}, DAG) &&
V2.getOpcode() == ISD::SCALAR_TO_VECTOR &&
V2.getScalarValueSizeInBits() <= 32) {
if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS)
return SDValue(); // Nothing to do!
- PermuteImm = (/*DstIdx*/2 << 4) | (/*SrcIdx*/0 << 0);
+ PermuteImm = (/*DstIdx*/ 2 << 4) | (/*SrcIdx*/ 0 << 0);
Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32,
CanonicalizeShuffleInput(MVT::v4f32, V1),
CanonicalizeShuffleInput(MVT::v4f32, V2),
@@ -51654,9 +51733,13 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
// Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
// Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
// Otherwise use PCMPEQ (plus AND) and mask testing.
- if ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && Subtarget.hasAVX()) ||
- (OpSize == 512 && Subtarget.useAVX512Regs())) {
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+ ((OpSize == 128 && Subtarget.hasSSE2()) ||
+ (OpSize == 256 && Subtarget.hasAVX()) ||
+ (OpSize == 512 && Subtarget.useAVX512Regs()))) {
bool HasPT = Subtarget.hasSSE41();
// PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index a55b95960aa6..6124755ca539 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1532,44 +1532,6 @@ def : Pat<(xor GR32:$src1, -2147483648),
}
//===----------------------------------------------------------------------===//
-// Pattern match SUB as XOR
-//===----------------------------------------------------------------------===//
-
-// An immediate in the LHS of a subtract can't be encoded in the instruction.
-// If there is no possibility of a borrow we can use an XOR instead of a SUB
-// to enable the immediate to be folded.
-// TODO: Move this to a DAG combine?
-
-def sub_is_xor : PatFrag<(ops node:$lhs, node:$rhs), (sub node:$lhs, node:$rhs),[{
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- KnownBits Known = CurDAG->computeKnownBits(N->getOperand(1));
-
- // If all possible ones in the RHS are set in the LHS then there can't be
- // a borrow and we can use xor.
- return (~Known.Zero).isSubsetOf(CN->getAPIntValue());
- }
-
- return false;
-}]>;
-
-let AddedComplexity = 5 in {
-def : Pat<(sub_is_xor imm:$src2, GR8:$src1),
- (XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(sub_is_xor i16immSExt8:$src2, GR16:$src1),
- (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(sub_is_xor imm:$src2, GR16:$src1),
- (XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(sub_is_xor i32immSExt8:$src2, GR32:$src1),
- (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(sub_is_xor imm:$src2, GR32:$src1),
- (XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub_is_xor i64immSExt8:$src2, GR64:$src1),
- (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(sub_is_xor i64immSExt32:$src2, GR64:$src1),
- (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-}
-
-//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
index 52b2a62316cd..c4317be664fd 100644
--- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -13,8 +13,8 @@
#include "X86InstrFMA3Info.h"
#include "X86InstrInfo.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Threading.h"
+#include <atomic>
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 27220a8d4d99..8aeb169929f2 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -13,6 +13,7 @@
#include "X86InstrFoldTables.h"
#include "X86InstrInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include <atomic>
#include <vector>
using namespace llvm;
@@ -6102,7 +6103,7 @@ llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) {
namespace {
// This class stores the memory unfolding tables. It is instantiated as a
-// ManagedStatic to lazily init the unfolding table.
+// function scope static variable to lazily init the unfolding table.
struct X86MemUnfoldTable {
// Stores memory unfolding tables entries sorted by opcode.
std::vector<X86MemoryFoldTableEntry> Table;
@@ -6159,11 +6160,10 @@ struct X86MemUnfoldTable {
};
}
-static ManagedStatic<X86MemUnfoldTable> MemUnfoldTable;
-
const X86MemoryFoldTableEntry *
llvm::lookupUnfoldTable(unsigned MemOp) {
- auto &Table = MemUnfoldTable->Table;
+ static X86MemUnfoldTable MemUnfoldTable;
+ auto &Table = MemUnfoldTable.Table;
auto I = llvm::lower_bound(Table, MemOp);
if (I != Table.end() && I->KeyOp == MemOp)
return &*I;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 7f6ef3479d40..4a9a281d5b99 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -978,6 +978,7 @@ def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
def HasCLWB : Predicate<"Subtarget->hasCLWB()">;
def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">;
def HasRDPID : Predicate<"Subtarget->hasRDPID()">;
+def HasRDPRU : Predicate<"Subtarget->hasRDPRU()">;
def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">;
def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">;
def HasCX8 : Predicate<"Subtarget->hasCX8()">;
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index 3a653a56e534..b1ca87279007 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -735,6 +735,15 @@ def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
} // SchedRW
//===----------------------------------------------------------------------===//
+// RDPRU - Read Processor Register instruction.
+
+let SchedRW = [WriteSystem] in {
+let Uses = [ECX], Defs = [EAX, EDX] in
+ def RDPRU : I<0x01, MRM_FD, (outs), (ins), "rdpru", []>, PS,
+ Requires<[HasRDPRU]>;
+}
+
+//===----------------------------------------------------------------------===//
// Platform Configuration instruction
// From ISA docs:
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 3c8be95b43e3..6112c0b7d6c3 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -37,7 +37,7 @@ enum IntrinsicType : uint16_t {
TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2DQ_MASK, CVTQQ2PS_MASK,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
FIXUPIMM, FIXUPIMM_MASKZ, GATHER_AVX2,
- ROUNDP, ROUNDS
+ ROUNDP, ROUNDS, RDPRU
};
struct IntrinsicData {
@@ -309,6 +309,7 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, 0, 0),
X86_INTRINSIC_DATA(rdpmc, RDPMC, X86::RDPMC, 0),
+ X86_INTRINSIC_DATA(rdpru, RDPRU, X86::RDPRU, 0),
X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_64, RDRAND, X86ISD::RDRAND, 0),
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index b107de692365..3fbdb18a0793 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2413,6 +2413,10 @@ static void addConstantComments(const MachineInstr *MI,
}
void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
+ // FIXME: Enable feature predicate checks once all the test pass.
+ // X86_MC::verifyInstructionPredicates(MI->getOpcode(),
+ // Subtarget->getFeatureBits());
+
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI =
MF->getSubtarget<X86Subtarget>().getRegisterInfo();
diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp
index 7761f7323358..c760a32e2579 100644
--- a/llvm/lib/Target/X86/X86PartialReduction.cpp
+++ b/llvm/lib/Target/X86/X86PartialReduction.cpp
@@ -439,8 +439,8 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
while (!Worklist.empty()) {
Value *V = Worklist.pop_back_val();
- if (!Visited.insert(V).second)
- continue;
+ if (!Visited.insert(V).second)
+ continue;
if (auto *PN = dyn_cast<PHINode>(V)) {
// PHI node should have single use unless it is the root node, then it
@@ -466,7 +466,7 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
// gets us back to this node.
if (BO->hasNUses(BO == Root ? 3 : 2)) {
PHINode *PN = nullptr;
- for (auto *U : Root->users())
+ for (auto *U : BO->users())
if (auto *P = dyn_cast<PHINode>(U))
if (!Visited.count(P))
PN = P;
diff --git a/llvm/lib/Target/X86/X86ReturnThunks.cpp b/llvm/lib/Target/X86/X86ReturnThunks.cpp
new file mode 100644
index 000000000000..4b203229ba83
--- /dev/null
+++ b/llvm/lib/Target/X86/X86ReturnThunks.cpp
@@ -0,0 +1,92 @@
+//==- X86ReturnThunks.cpp - Replace rets with thunks or inline thunks --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Pass that replaces ret instructions with a jmp to __x86_return_thunk.
+///
+/// This corresponds to -mfunction-return=thunk-extern or
+/// __attribute__((function_return("thunk-extern").
+///
+/// This pass is a minimal implementation necessary to help mitigate
+/// RetBleed for the Linux kernel.
+///
+/// Should support for thunk or thunk-inline be necessary in the future, then
+/// this pass should be combined with x86-retpoline-thunks which already has
+/// machinery to emit thunks. Until then, YAGNI.
+///
+/// This pass is very similar to x86-lvi-ret.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define PASS_KEY "x86-return-thunks"
+#define DEBUG_TYPE PASS_KEY
+
+struct X86ReturnThunks final : public MachineFunctionPass {
+ static char ID;
+ X86ReturnThunks() : MachineFunctionPass(ID) {}
+ StringRef getPassName() const override { return "X86 Return Thunks"; }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+char X86ReturnThunks::ID = 0;
+
+bool X86ReturnThunks::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << getPassName() << "\n");
+
+ bool Modified = false;
+
+ if (!MF.getFunction().hasFnAttribute(llvm::Attribute::FnRetThunkExtern))
+ return Modified;
+
+ StringRef ThunkName = "__x86_return_thunk";
+ if (MF.getFunction().getName() == ThunkName)
+ return Modified;
+
+ const auto &ST = MF.getSubtarget<X86Subtarget>();
+ const bool Is64Bit = ST.getTargetTriple().getArch() == Triple::x86_64;
+ const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32;
+ SmallVector<MachineInstr *, 16> Rets;
+
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &Term : MBB.terminators())
+ if (Term.getOpcode() == RetOpc)
+ Rets.push_back(&Term);
+
+ const MCInstrDesc &JMP = ST.getInstrInfo()->get(X86::TAILJMPd);
+
+ for (MachineInstr *Ret : Rets) {
+ BuildMI(Ret->getParent(), Ret->getDebugLoc(), JMP)
+ .addExternalSymbol(ThunkName.data());
+ Ret->eraseFromParent();
+ Modified = true;
+ }
+
+ return Modified;
+}
+
+INITIALIZE_PASS(X86ReturnThunks, PASS_KEY, "X86 Return Thunks", false, false)
+
+FunctionPass *llvm::createX86ReturnThunksPass() {
+ return new X86ReturnThunks();
+}
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 4249788e3540..f4e25e4194db 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -100,6 +100,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86OptimizeLEAPassPass(PR);
initializeX86PartialReductionPass(PR);
initializePseudoProbeInserterPass(PR);
+ initializeX86ReturnThunksPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -575,6 +576,7 @@ void X86PassConfig::addPreEmitPass2() {
// hand inspection of the codegen output.
addPass(createX86SpeculativeExecutionSideEffectSuppression());
addPass(createX86IndirectThunksPass());
+ addPass(createX86ReturnThunksPass());
// Insert extra int3 instructions after trailing call instructions to avoid
// issues in the unwinder.
diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index c286b747a271..a782ff436dc0 100644
--- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -29,6 +29,7 @@
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "XCoreGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index 096b22415a22..ec4418333859 100644
--- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -22,6 +22,7 @@
// Defines symbolic names for the XCore instructions.
//
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "XCoreGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8fea61d125d2..691fdf16bc0f 100644
--- a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -256,6 +256,9 @@ bool XCoreAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void XCoreAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ XCore_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
+
SmallString<128> Str;
raw_svector_ostream O(Str);
diff --git a/llvm/lib/ToolDrivers/llvm-lib/Options.td b/llvm/lib/ToolDrivers/llvm-lib/Options.td
index 0d97f77e525f..9d969b040ef2 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/Options.td
+++ b/llvm/lib/ToolDrivers/llvm-lib/Options.td
@@ -44,5 +44,7 @@ def help_q : Flag<["/??", "-??", "/?", "-?"], "">, Alias<help>;
//==============================================================================
def ltcg : F<"ltcg">;
+def nodefaultlib: P<"nodefaultlib", "">;
+def nodefaultlib_all: F<"nodefaultlib">;
def nologo : F<"nologo">;
def subsystem : P<"subsystem", "">;
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index d09607bb1c4c..51eb8ebf0369 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -881,16 +881,16 @@ static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
dwarf::DW_ATE_float,
llvm::DINode::FlagArtificial);
} else if (Ty->isPointerTy()) {
- // Construct BasicType instead of PointerType to avoid infinite
- // search problem.
- // For example, we would be in trouble if we traverse recursively:
+ // Construct PointerType points to null (aka void *) instead of exploring
+ // pointee type to avoid infinite search problem. For example, we would be
+ // in trouble if we traverse recursively:
//
// struct Node {
// Node* ptr;
// };
- RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
- dwarf::DW_ATE_address,
- llvm::DINode::FlagArtificial);
+ RetType = Builder.createPointerType(nullptr, Layout.getTypeSizeInBits(Ty),
+ Layout.getABITypeAlignment(Ty),
+ /*DWARFAddressSpace=*/None, Name);
} else if (Ty->isStructTy()) {
auto *DIStruct = Builder.createStructType(
Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
@@ -914,13 +914,21 @@ static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
RetType = DIStruct;
} else {
- LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n";);
- SmallString<32> Buffer;
- raw_svector_ostream OS(Buffer);
- OS << Name.str() << "_" << Layout.getTypeSizeInBits(Ty);
- RetType = Builder.createBasicType(OS.str(), Layout.getTypeSizeInBits(Ty),
- dwarf::DW_ATE_address,
- llvm::DINode::FlagArtificial);
+ LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
+ TypeSize Size = Layout.getTypeSizeInBits(Ty);
+ auto *CharSizeType = Builder.createBasicType(
+ Name, 8, dwarf::DW_ATE_unsigned_char, llvm::DINode::FlagArtificial);
+
+ if (Size <= 8)
+ RetType = CharSizeType;
+ else {
+ if (Size % 8 != 0)
+ Size = TypeSize::Fixed(Size + 8 - (Size % 8));
+
+ RetType = Builder.createArrayType(
+ Size, Layout.getPrefTypeAlign(Ty).value(), CharSizeType,
+ Builder.getOrCreateArray(Builder.getOrCreateSubrange(0, Size / 8)));
+ }
}
DITypeCache.insert({Ty, RetType});
@@ -971,7 +979,8 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
unsigned LineNum = PromiseDIVariable->getLine();
DICompositeType *FrameDITy = DBuilder.createStructType(
- DIS, "__coro_frame_ty", DFile, LineNum, Shape.FrameSize * 8,
+ DIS->getUnit(), Twine(F.getName() + ".coro_frame_ty").str(),
+ DFile, LineNum, Shape.FrameSize * 8,
Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
llvm::DINodeArray());
StructType *FrameTy = Shape.FrameTy;
@@ -995,14 +1004,12 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
*IndexTy = FrameTy->getElementType(IndexIndex);
DenseMap<unsigned, DIType *> TyCache;
- TyCache.insert({ResumeIndex,
- DBuilder.createBasicType("__resume_fn",
- Layout.getTypeSizeInBits(ResumeFnTy),
- dwarf::DW_ATE_address)});
TyCache.insert(
- {DestroyIndex, DBuilder.createBasicType(
- "__destroy_fn", Layout.getTypeSizeInBits(DestroyFnTy),
- dwarf::DW_ATE_address)});
+ {ResumeIndex, DBuilder.createPointerType(
+ nullptr, Layout.getTypeSizeInBits(ResumeFnTy))});
+ TyCache.insert(
+ {DestroyIndex, DBuilder.createPointerType(
+ nullptr, Layout.getTypeSizeInBits(DestroyFnTy))});
/// FIXME: If we fill the field `SizeInBits` with the actual size of
/// __coro_index in bits, then __coro_index wouldn't show in the debugger.
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index ead552d9be4e..9c1b247cdb39 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -389,7 +389,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
// Replace CoroSave with a store to Index:
// %index.addr = getelementptr %f.frame... (index field number)
- // store i32 0, i32* %index.addr1
+ // store i32 %IndexVal, i32* %index.addr1
auto *Save = S->getCoroSave();
Builder.SetInsertPoint(Save);
if (S->isFinal()) {
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index b05b7990e3f0..e5ff98e4f73f 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -718,8 +718,8 @@ Argument *IRPosition::getAssociatedArgument() const {
}
// If we found a unique callback candidate argument, return it.
- if (CBCandidateArg && CBCandidateArg.getValue())
- return CBCandidateArg.getValue();
+ if (CBCandidateArg && CBCandidateArg.value())
+ return CBCandidateArg.value();
// If no callbacks were found, or none used the underlying call site operand
// exclusively, use the direct callee argument if available.
@@ -1048,11 +1048,11 @@ Attributor::getAssumedConstant(const IRPosition &IRP,
recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
return llvm::None;
}
- if (isa_and_nonnull<UndefValue>(SimplifiedV.getValue())) {
+ if (isa_and_nonnull<UndefValue>(SimplifiedV.value())) {
recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
return UndefValue::get(IRP.getAssociatedType());
}
- Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.getValue());
+ Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.value());
if (CI)
CI = dyn_cast_or_null<Constant>(
AA::getWithType(*CI, *IRP.getAssociatedType()));
@@ -2697,8 +2697,8 @@ void InformationCache::initializeInformationCache(const Function &CF,
Optional<short> &NumUses = AssumeUsesMap[I];
if (!NumUses)
NumUses = I->getNumUses();
- NumUses = NumUses.getValue() - /* this assume */ 1;
- if (NumUses.getValue() != 0)
+ NumUses = NumUses.value() - /* this assume */ 1;
+ if (NumUses.value() != 0)
continue;
AssumeOnlyValues.insert(I);
for (const Value *Op : I->operands())
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 4d99ce7e3175..1ff54b78e27e 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -437,7 +437,7 @@ static bool genericValueTraversal(
A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation);
if (!SimpleV)
continue;
- Value *NewV = SimpleV.getValue();
+ Value *NewV = SimpleV.value();
if (NewV && NewV != V) {
if ((VS & AA::Interprocedural) || !CtxI ||
AA::isValidInScope(*NewV, CtxI->getFunction())) {
@@ -1891,14 +1891,14 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
// Check if we have an assumed unique return value that we could manifest.
Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A);
- if (!UniqueRV || !UniqueRV.getValue())
+ if (!UniqueRV || !UniqueRV.value())
return Changed;
// Bookkeeping.
STATS_DECLTRACK(UniqueReturnValue, FunctionReturn,
"Number of function with unique return");
// If the assumed unique return value is an argument, annotate it.
- if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
+ if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.value())) {
if (UniqueRVArg->getType()->canLosslesslyBitCastTo(
getAssociatedFunction()->getReturnType())) {
getIRPosition() = IRPosition::argument(*UniqueRVArg);
@@ -2666,9 +2666,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
// Either we stopped and the appropriate action was taken,
// or we got back a simplified value to continue.
Optional<Value *> SimplifiedPtrOp = stopOnUndefOrAssumed(A, PtrOp, &I);
- if (!SimplifiedPtrOp || !SimplifiedPtrOp.getValue())
+ if (!SimplifiedPtrOp || !SimplifiedPtrOp.value())
return true;
- const Value *PtrOpVal = SimplifiedPtrOp.getValue();
+ const Value *PtrOpVal = SimplifiedPtrOp.value();
// A memory access through a pointer is considered UB
// only if the pointer has constant null value.
@@ -2757,14 +2757,14 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
IRPosition::value(*ArgVal), *this, UsedAssumedInformation);
if (UsedAssumedInformation)
continue;
- if (SimplifiedVal && !SimplifiedVal.getValue())
+ if (SimplifiedVal && !SimplifiedVal.value())
return true;
- if (!SimplifiedVal || isa<UndefValue>(*SimplifiedVal.getValue())) {
+ if (!SimplifiedVal || isa<UndefValue>(*SimplifiedVal.value())) {
KnownUBInsts.insert(&I);
continue;
}
if (!ArgVal->getType()->isPointerTy() ||
- !isa<ConstantPointerNull>(*SimplifiedVal.getValue()))
+ !isa<ConstantPointerNull>(*SimplifiedVal.value()))
continue;
auto &NonNullAA =
A.getAAFor<AANonNull>(*this, CalleeArgumentIRP, DepClassTy::NONE);
@@ -4101,11 +4101,11 @@ identifyAliveSuccessors(Attributor &A, const SwitchInst &SI,
bool UsedAssumedInformation = false;
Optional<Constant *> C =
A.getAssumedConstant(*SI.getCondition(), AA, UsedAssumedInformation);
- if (!C || isa_and_nonnull<UndefValue>(C.getValue())) {
+ if (!C || isa_and_nonnull<UndefValue>(C.value())) {
// No value yet, assume all edges are dead.
- } else if (isa_and_nonnull<ConstantInt>(C.getValue())) {
+ } else if (isa_and_nonnull<ConstantInt>(C.value())) {
for (auto &CaseIt : SI.cases()) {
- if (CaseIt.getCaseValue() == C.getValue()) {
+ if (CaseIt.getCaseValue() == C.value()) {
AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front());
return UsedAssumedInformation;
}
@@ -5523,11 +5523,10 @@ struct AAValueSimplifyImpl : AAValueSimplify {
if (!SimpleV)
return PoisonValue::get(&Ty);
Value *EffectiveV = &V;
- if (SimpleV.getValue())
- EffectiveV = SimpleV.getValue();
+ if (SimpleV.value())
+ EffectiveV = SimpleV.value();
if (auto *C = dyn_cast<Constant>(EffectiveV))
- if (!C->canTrap())
- return C;
+ return C;
if (CtxI && AA::isValidAtPosition(AA::ValueAndContext(*EffectiveV, *CtxI),
A.getInfoCache()))
return ensureType(A, *EffectiveV, Ty, CtxI, Check);
@@ -5541,7 +5540,7 @@ struct AAValueSimplifyImpl : AAValueSimplify {
/// nullptr if we don't have one that makes sense.
Value *manifestReplacementValue(Attributor &A, Instruction *CtxI) const {
Value *NewV = SimplifiedAssociatedValue
- ? SimplifiedAssociatedValue.getValue()
+ ? SimplifiedAssociatedValue.value()
: UndefValue::get(getAssociatedType());
if (NewV && NewV != &getAssociatedValue()) {
ValueToValueMapTy VMap;
@@ -5672,7 +5671,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
A.getAssumedConstant(ACSArgPos, *this, UsedAssumedInformation);
if (!SimpleArgOp)
return true;
- if (!SimpleArgOp.getValue())
+ if (!SimpleArgOp.value())
return false;
if (!AA::isDynamicallyUnique(A, *this, **SimpleArgOp))
return false;
@@ -5787,7 +5786,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return true;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return false;
LHS = *SimplifiedLHS;
@@ -5796,7 +5795,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return true;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return false;
RHS = *SimplifiedRHS;
@@ -5868,8 +5867,8 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
if (!SimplifiedOp)
return true;
- if (SimplifiedOp.getValue())
- NewOps[Idx] = SimplifiedOp.getValue();
+ if (SimplifiedOp.value())
+ NewOps[Idx] = SimplifiedOp.value();
else
NewOps[Idx] = Op;
@@ -6112,6 +6111,10 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
/// but which is not in the deallocation infos.
bool HasPotentiallyFreeingUnknownUses = false;
+ /// Flag to indicate that we should place the new alloca in the function
+ /// entry block rather than where the call site (CB) is.
+ bool MoveAllocaIntoEntry = true;
+
/// The set of free calls that use this allocation.
SmallSetVector<CallBase *, 1> PotentialFreeCalls{};
};
@@ -6242,17 +6245,6 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
- LoopInfo *LI =
- A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F);
- Optional<bool> MayContainIrreducibleControl;
- auto IsInLoop = [&](BasicBlock &BB) {
- if (!MayContainIrreducibleControl.has_value())
- MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI);
- if (MayContainIrreducibleControl.value())
- return true;
- return LI->getLoopFor(&BB) != nullptr;
- };
-
for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
@@ -6294,25 +6286,25 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
Size = SizeOffsetPair.first;
}
- Instruction *IP = (!SizeAPI.has_value() || IsInLoop(*AI.CB->getParent()))
- ? AI.CB
- : &F->getEntryBlock().front();
+ Instruction *IP =
+ AI.MoveAllocaIntoEntry ? &F->getEntryBlock().front() : AI.CB;
Align Alignment(1);
if (MaybeAlign RetAlign = AI.CB->getRetAlign())
Alignment = std::max(Alignment, *RetAlign);
if (Value *Align = getAllocAlignment(AI.CB, TLI)) {
Optional<APInt> AlignmentAPI = getAPInt(A, *this, *Align);
- assert(AlignmentAPI && AlignmentAPI.getValue().getZExtValue() > 0 &&
+ assert(AlignmentAPI && AlignmentAPI.value().getZExtValue() > 0 &&
"Expected an alignment during manifest!");
Alignment = std::max(
- Alignment, assumeAligned(AlignmentAPI.getValue().getZExtValue()));
+ Alignment, assumeAligned(AlignmentAPI.value().getZExtValue()));
}
// TODO: Hoist the alloca towards the function entry.
unsigned AS = DL.getAllocaAddrSpace();
- Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS,
- Size, Alignment, "", IP);
+ Instruction *Alloca =
+ new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment,
+ AI.CB->getName() + ".h2s", IP);
if (Alloca->getType() != AI.CB->getType())
Alloca = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
@@ -6354,7 +6346,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
A.getAssumedConstant(V, AA, UsedAssumedInformation);
if (!SimpleV)
return APInt(64, 0);
- if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.getValue()))
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.value()))
return CI->getValue();
return llvm::None;
}
@@ -6400,6 +6392,21 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
bool StackIsAccessibleByOtherThreads =
A.getInfoCache().stackIsAccessibleByOtherThreads();
+ LoopInfo *LI =
+ A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F);
+ Optional<bool> MayContainIrreducibleControl;
+ auto IsInLoop = [&](BasicBlock &BB) {
+ if (&F->getEntryBlock() == &BB)
+ return false;
+ if (!MayContainIrreducibleControl.has_value())
+ MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI);
+ if (MayContainIrreducibleControl.value())
+ return true;
+ if (!LI)
+ return true;
+ return LI->getLoopFor(&BB) != nullptr;
+ };
+
// Flag to ensure we update our deallocation information at most once per
// updateImpl call and only if we use the free check reasoning.
bool HasUpdatedFrees = false;
@@ -6617,21 +6624,20 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
continue;
- } else {
- if (APAlign->ugt(llvm::Value::MaximumAlignment) ||
- !APAlign->isPowerOf2()) {
- LLVM_DEBUG(dbgs() << "[H2S] Invalid allocation alignment: " << APAlign
- << "\n");
- AI.Status = AllocationInfo::INVALID;
- Changed = ChangeStatus::CHANGED;
- continue;
- }
+ }
+ if (APAlign->ugt(llvm::Value::MaximumAlignment) ||
+ !APAlign->isPowerOf2()) {
+ LLVM_DEBUG(dbgs() << "[H2S] Invalid allocation alignment: " << APAlign
+ << "\n");
+ AI.Status = AllocationInfo::INVALID;
+ Changed = ChangeStatus::CHANGED;
+ continue;
}
}
+ Optional<APInt> Size = getSize(A, *this, AI);
if (MaxHeapToStackSize != -1) {
- Optional<APInt> Size = getSize(A, *this, AI);
- if (!Size || Size.getValue().ugt(MaxHeapToStackSize)) {
+ if (!Size || Size.value().ugt(MaxHeapToStackSize)) {
LLVM_DEBUG({
if (!Size)
dbgs() << "[H2S] Unknown allocation size: " << *AI.CB << "\n";
@@ -6649,18 +6655,23 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
switch (AI.Status) {
case AllocationInfo::STACK_DUE_TO_USE:
if (UsesCheck(AI))
- continue;
+ break;
AI.Status = AllocationInfo::STACK_DUE_TO_FREE;
LLVM_FALLTHROUGH;
case AllocationInfo::STACK_DUE_TO_FREE:
if (FreeCheck(AI))
- continue;
+ break;
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
- continue;
+ break;
case AllocationInfo::INVALID:
llvm_unreachable("Invalid allocations should never reach this point!");
};
+
+ // Check if we still think we can move it into the entry block.
+ if (AI.MoveAllocaIntoEntry &&
+ (!Size.has_value() || IsInLoop(*AI.CB->getParent())))
+ AI.MoveAllocaIntoEntry = false;
}
return Changed;
@@ -6748,8 +6759,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
- if (CSTy && CSTy.getValue())
- CSTy.getValue()->print(dbgs());
+ if (CSTy && CSTy.value())
+ CSTy.value()->print(dbgs());
else if (CSTy)
dbgs() << "<nullptr>";
else
@@ -6760,8 +6771,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
LLVM_DEBUG({
dbgs() << " : New Type: ";
- if (Ty && Ty.getValue())
- Ty.getValue()->print(dbgs());
+ if (Ty && Ty.value())
+ Ty.value()->print(dbgs());
else if (Ty)
dbgs() << "<nullptr>";
else
@@ -6769,7 +6780,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
dbgs() << "\n";
});
- return !Ty || Ty.getValue();
+ return !Ty || Ty.value();
};
if (!A.checkForAllCallSites(CallSiteCheck, *this, true,
@@ -6783,7 +6794,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
PrivatizableType = identifyPrivatizableType(A);
if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
- if (!PrivatizableType.getValue())
+ if (!PrivatizableType.value())
return indicatePessimisticFixpoint();
// The dependence is optional so we don't give up once we give up on the
@@ -6871,7 +6882,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType();
if (!CBArgPrivTy)
continue;
- if (CBArgPrivTy.getValue() == PrivatizableType)
+ if (CBArgPrivTy.value() == PrivatizableType)
continue;
}
@@ -6918,7 +6929,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType();
if (!DCArgPrivTy)
return true;
- if (DCArgPrivTy.getValue() == PrivatizableType)
+ if (DCArgPrivTy.value() == PrivatizableType)
return true;
}
}
@@ -7060,7 +7071,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
ChangeStatus manifest(Attributor &A) override {
if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
- assert(PrivatizableType.getValue() && "Expected privatizable type!");
+ assert(PrivatizableType.value() && "Expected privatizable type!");
// Collect all tail calls in the function as we cannot allow new allocas to
// escape into tail recursion.
@@ -7093,9 +7104,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Instruction *IP = &*EntryBB.getFirstInsertionPt();
const DataLayout &DL = IP->getModule()->getDataLayout();
unsigned AS = DL.getAllocaAddrSpace();
- Instruction *AI = new AllocaInst(PrivatizableType.getValue(), AS,
+ Instruction *AI = new AllocaInst(PrivatizableType.value(), AS,
Arg->getName() + ".priv", IP);
- createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
+ createInitialization(PrivatizableType.value(), *AI, ReplacementFn,
ArgIt->getArgNo(), *IP);
if (AI->getType() != Arg->getType())
@@ -7203,7 +7214,7 @@ struct AAPrivatizablePtrCallSiteArgument final
PrivatizableType = identifyPrivatizableType(A);
if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
- if (!PrivatizableType.getValue())
+ if (!PrivatizableType.value())
return indicatePessimisticFixpoint();
const IRPosition &IRP = getIRPosition();
@@ -8664,7 +8675,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return true;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return false;
LHS = *SimplifiedLHS;
@@ -8673,7 +8684,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return true;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return false;
RHS = *SimplifiedRHS;
@@ -8717,7 +8728,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedOpV)
return true;
- if (!SimplifiedOpV.getValue())
+ if (!SimplifiedOpV.value())
return false;
OpV = *SimplifiedOpV;
@@ -8747,7 +8758,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return true;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return false;
LHS = *SimplifiedLHS;
@@ -8756,7 +8767,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return true;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return false;
RHS = *SimplifiedRHS;
@@ -8821,7 +8832,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedOpV)
return true;
- if (!SimplifiedOpV.getValue())
+ if (!SimplifiedOpV.value())
return false;
Value *VPtr = *SimplifiedOpV;
@@ -9182,7 +9193,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return indicatePessimisticFixpoint();
LHS = *SimplifiedLHS;
@@ -9191,7 +9202,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return indicatePessimisticFixpoint();
RHS = *SimplifiedRHS;
@@ -9265,7 +9276,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return indicatePessimisticFixpoint();
LHS = *SimplifiedLHS;
@@ -9274,7 +9285,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return indicatePessimisticFixpoint();
RHS = *SimplifiedRHS;
@@ -9340,7 +9351,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedSrc)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedSrc.getValue())
+ if (!SimplifiedSrc.value())
return indicatePessimisticFixpoint();
Src = *SimplifiedSrc;
@@ -9373,7 +9384,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return indicatePessimisticFixpoint();
LHS = *SimplifiedLHS;
@@ -9382,7 +9393,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return indicatePessimisticFixpoint();
RHS = *SimplifiedRHS;
@@ -9441,7 +9452,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
UsedAssumedInformation);
if (!SimplifiedIncomingValue)
continue;
- if (!SimplifiedIncomingValue.getValue())
+ if (!SimplifiedIncomingValue.value())
return indicatePessimisticFixpoint();
IncomingValue = *SimplifiedIncomingValue;
@@ -9930,7 +9941,7 @@ private:
const Function &Fn) {
Optional<bool> Cached = isCachedReachable(Fn);
if (Cached)
- return Cached.getValue();
+ return Cached.value();
// The query was not cached, thus it is new. We need to request an update
// explicitly to make sure this the information is properly run to a
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 1a1bde4f0668..1ad6e2b2a1d2 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1584,11 +1584,6 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
}
Value *StoredOnceValue = GS.getStoredOnceValue();
if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
- // Avoid speculating constant expressions that might trap (div/rem).
- auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
- if (SOVConstant && SOVConstant->canTrap())
- return Changed;
-
Function &StoreFn =
const_cast<Function &>(*GS.StoredOnceStore->getFunction());
bool CanHaveNonUndefGlobalInitializer =
@@ -1601,6 +1596,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
// This is restricted to address spaces that allow globals to have
// initializers. NVPTX, for example, does not support initializers for
// shared memory (AS 3).
+ auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
if (SOVConstant && isa<UndefValue>(GV->getInitializer()) &&
DL.getTypeAllocSize(SOVConstant->getType()) ==
DL.getTypeAllocSize(GV->getValueType()) &&
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index d75d99e307fd..28bc43aa1633 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -555,7 +555,7 @@ collectRegionsConstants(OutlinableRegion &Region,
for (Value *V : ID.OperVals) {
Optional<unsigned> GVNOpt = C.getGVN(V);
assert(GVNOpt && "Expected a GVN for operand?");
- unsigned GVN = GVNOpt.getValue();
+ unsigned GVN = GVNOpt.value();
// Check if this global value has been found to not be the same already.
if (NotSame.contains(GVN)) {
@@ -570,7 +570,7 @@ collectRegionsConstants(OutlinableRegion &Region,
// it is considered to not be the same value.
Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
if (ConstantMatches) {
- if (ConstantMatches.getValue())
+ if (ConstantMatches.value())
continue;
else
ConstantsTheSame = false;
@@ -651,7 +651,7 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
// Transfer the swifterr attribute to the correct function parameter.
if (Group.SwiftErrorArgument)
- Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
+ Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.value(),
Attribute::SwiftError);
Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
@@ -809,7 +809,7 @@ static void mapInputsToGVNs(IRSimilarityCandidate &C,
if (OutputMappings.find(Input) != OutputMappings.end())
Input = OutputMappings.find(Input)->second;
assert(C.getGVN(Input) && "Could not find a numbering for the given input");
- EndInputNumbers.push_back(C.getGVN(Input).getValue());
+ EndInputNumbers.push_back(C.getGVN(Input).value());
}
}
@@ -948,11 +948,11 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
for (unsigned InputVal : InputGVNs) {
Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
assert(CanonicalNumberOpt && "Canonical number not found?");
- unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
+ unsigned CanonicalNumber = CanonicalNumberOpt.value();
Optional<Value *> InputOpt = C.fromGVN(InputVal);
assert(InputOpt && "Global value number not found?");
- Value *Input = InputOpt.getValue();
+ Value *Input = InputOpt.value();
DenseMap<unsigned, unsigned>::iterator AggArgIt =
Group.CanonicalNumberToAggArg.find(CanonicalNumber);
@@ -1236,13 +1236,13 @@ static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
Optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
assert(BBGVN && "Could not find GVN for the incoming block!");
- BBGVN = Cand.getCanonicalNum(BBGVN.getValue());
+ BBGVN = Cand.getCanonicalNum(BBGVN.value());
assert(BBGVN && "Could not find canonical number for the incoming block!");
// Create a pair of the exit block canonical value, and the aggregate
// argument location, connected to the canonical numbers stored in the
// PHINode.
PHINodeData TemporaryPair =
- std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs);
+ std::make_pair(std::make_pair(BBGVN.value(), AggArgIdx), PHIGVNs);
hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
// Look for and create a new entry in our connection between canonical
@@ -1516,8 +1516,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
// Make sure that the argument in the new function has the SwiftError
// argument.
if (Group.SwiftErrorArgument)
- Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
- Attribute::SwiftError);
+ Call->addParamAttr(Group.SwiftErrorArgument.value(), Attribute::SwiftError);
return Call;
}
@@ -2082,9 +2081,9 @@ static void alignOutputBlockWithAggFunc(
if (MatchingBB) {
LLVM_DEBUG(dbgs() << "Set output block for region in function"
<< Region.ExtractedFunction << " to "
- << MatchingBB.getValue());
+ << MatchingBB.value());
- Region.OutputBlockNum = MatchingBB.getValue();
+ Region.OutputBlockNum = MatchingBB.value();
for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
VtoBB.second->eraseFromParent();
return;
@@ -2679,15 +2678,14 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region,
if (!OutputIdx)
return;
- if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
- OutputMappings.end()) {
+ if (OutputMappings.find(Outputs[OutputIdx.value()]) == OutputMappings.end()) {
LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
- << *Outputs[OutputIdx.getValue()] << "\n");
- OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
+ << *Outputs[OutputIdx.value()] << "\n");
+ OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.value()]));
} else {
- Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
+ Value *Orig = OutputMappings.find(Outputs[OutputIdx.value()])->second;
LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
- << *Outputs[OutputIdx.getValue()] << "\n");
+ << *Outputs[OutputIdx.value()] << "\n");
OutputMappings.insert(std::make_pair(LI, Orig));
}
}
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 227ad8501f25..8e0ca8c6c997 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -3340,6 +3340,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
}
bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
+ if (!mayContainParallelRegion())
+ return false;
+
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
if (!SPMDCompatibilityTracker.isAssumed()) {
@@ -4428,10 +4431,10 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
if (!SimplifiedValue)
return Str + std::string("none");
- if (!SimplifiedValue.getValue())
+ if (!SimplifiedValue.value())
return Str + std::string("nullptr");
- if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.getValue()))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.value()))
return Str + std::to_string(CI->getSExtValue());
return Str + std::string("unknown");
@@ -4456,7 +4459,7 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
assert((isValidState() ||
- (SimplifiedValue && SimplifiedValue.getValue() == nullptr)) &&
+ (SimplifiedValue && SimplifiedValue.value() == nullptr)) &&
"Unexpected invalid state!");
if (!isAtFixpoint()) {
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index ae787be40c55..8eef82675e86 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -898,183 +898,6 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createAnnotationRemarksLegacyPass());
}
-void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
- // Load sample profile before running the LTO optimization pipeline.
- if (!PGOSampleUse.empty()) {
- PM.add(createPruneEHPass());
- PM.add(createSampleProfileLoaderPass(PGOSampleUse));
- }
-
- // Remove unused virtual tables to improve the quality of code generated by
- // whole-program devirtualization and bitset lowering.
- PM.add(createGlobalDCEPass());
-
- // Provide AliasAnalysis services for optimizations.
- addInitialAliasAnalysisPasses(PM);
-
- // Allow forcing function attributes as a debugging and tuning aid.
- PM.add(createForceFunctionAttrsLegacyPass());
-
- // Infer attributes about declarations if possible.
- PM.add(createInferFunctionAttrsLegacyPass());
-
- if (OptLevel > 1) {
- // Split call-site with more constrained arguments.
- PM.add(createCallSiteSplittingPass());
-
- // Propage constant function arguments by specializing the functions.
- if (EnableFunctionSpecialization && OptLevel > 2)
- PM.add(createFunctionSpecializationPass());
-
- // Propagate constants at call sites into the functions they call. This
- // opens opportunities for globalopt (and inlining) by substituting function
- // pointers passed as arguments to direct uses of functions.
- PM.add(createIPSCCPPass());
-
- // Attach metadata to indirect call sites indicating the set of functions
- // they may target at run-time. This should follow IPSCCP.
- PM.add(createCalledValuePropagationPass());
-
- // Infer attributes on declarations, call sites, arguments, etc.
- if (AttributorRun & AttributorRunOption::MODULE)
- PM.add(createAttributorLegacyPass());
- }
-
- // Infer attributes about definitions. The readnone attribute in particular is
- // required for virtual constant propagation.
- PM.add(createPostOrderFunctionAttrsLegacyPass());
- PM.add(createReversePostOrderFunctionAttrsPass());
-
- // Split globals using inrange annotations on GEP indices. This can help
- // improve the quality of generated code when virtual constant propagation or
- // control flow integrity are enabled.
- PM.add(createGlobalSplitPass());
-
- // Apply whole-program devirtualization and virtual constant propagation.
- PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
-
- // That's all we need at opt level 1.
- if (OptLevel == 1)
- return;
-
- // Now that we internalized some globals, see if we can hack on them!
- PM.add(createGlobalOptimizerPass());
- // Promote any localized global vars.
- PM.add(createPromoteMemoryToRegisterPass());
-
- // Linking modules together can lead to duplicated global constants, only
- // keep one copy of each constant.
- PM.add(createConstantMergePass());
-
- // Remove unused arguments from functions.
- PM.add(createDeadArgEliminationPass());
-
- // Reduce the code after globalopt and ipsccp. Both can open up significant
- // simplification opportunities, and both can propagate functions through
- // function pointers. When this happens, we often have to resolve varargs
- // calls, etc, so let instcombine do this.
- if (OptLevel > 2)
- PM.add(createAggressiveInstCombinerPass());
- PM.add(createInstructionCombiningPass());
- addExtensionsToPM(EP_Peephole, PM);
-
- // Inline small functions
- bool RunInliner = Inliner;
- if (RunInliner) {
- PM.add(Inliner);
- Inliner = nullptr;
- }
-
- PM.add(createPruneEHPass()); // Remove dead EH info.
-
- // Infer attributes on declarations, call sites, arguments, etc. for an SCC.
- if (AttributorRun & AttributorRunOption::CGSCC)
- PM.add(createAttributorCGSCCLegacyPass());
-
- // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
- // there are no OpenMP runtime calls present in the module.
- if (OptLevel > 1)
- PM.add(createOpenMPOptCGSCCLegacyPass());
-
- // Optimize globals again if we ran the inliner.
- if (RunInliner)
- PM.add(createGlobalOptimizerPass());
- PM.add(createGlobalDCEPass()); // Remove dead functions.
-
- // The IPO passes may leave cruft around. Clean up after them.
- PM.add(createInstructionCombiningPass());
- addExtensionsToPM(EP_Peephole, PM);
- PM.add(createJumpThreadingPass());
-
- // Break up allocas
- PM.add(createSROAPass());
-
- // LTO provides additional opportunities for tailcall elimination due to
- // link-time inlining, and visibility of nocapture attribute.
- if (OptLevel > 1)
- PM.add(createTailCallEliminationPass());
-
- // Infer attributes on declarations, call sites, arguments, etc.
- PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
- // Run a few AA driven optimizations here and now, to cleanup the code.
- PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
-
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true));
- PM.add(NewGVN ? createNewGVNPass()
- : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
- PM.add(createMemCpyOptPass()); // Remove dead memcpys.
-
- // Nuke dead stores.
- PM.add(createDeadStoreEliminationPass());
- PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
-
- // More loops are countable; try to optimize them.
- if (EnableLoopFlatten)
- PM.add(createLoopFlattenPass());
- PM.add(createIndVarSimplifyPass());
- PM.add(createLoopDeletionPass());
- if (EnableLoopInterchange)
- PM.add(createLoopInterchangePass());
-
- if (EnableConstraintElimination)
- PM.add(createConstraintEliminationPass());
-
- // Unroll small loops and perform peeling.
- PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
- ForgetAllSCEVInLoopUnroll));
- PM.add(createLoopDistributePass());
-
- addVectorPasses(PM, /* IsFullLTO */ true);
-
- addExtensionsToPM(EP_Peephole, PM);
-
- PM.add(createJumpThreadingPass());
-}
-
-void PassManagerBuilder::addLateLTOOptimizationPasses(
- legacy::PassManagerBase &PM) {
- // See comment in the new PM for justification of scheduling splitting at
- // this stage (\ref buildLTODefaultPipeline).
- if (EnableHotColdSplit)
- PM.add(createHotColdSplittingPass());
-
- // Delete basic blocks, which optimization passes may have killed.
- PM.add(
- createCFGSimplificationPass(SimplifyCFGOptions().hoistCommonInsts(true)));
-
- // Drop bodies of available externally objects to improve GlobalDCE.
- PM.add(createEliminateAvailableExternallyPass());
-
- // Now that we have optimized the program, discard unreachable functions.
- PM.add(createGlobalDCEPass());
-
- // FIXME: this is profitable (for compiler time) to do at -O0 too, but
- // currently it damages debug info.
- if (MergeFunctions)
- PM.add(createMergeFunctionsPass());
-}
-
LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
PassManagerBuilder *PMB = new PassManagerBuilder();
return wrap(PMB);
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 6859953de962..764fd57d245f 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -130,7 +130,7 @@ void ContextTrieNode::addFunctionSize(uint32_t FSize) {
if (!FuncSize)
FuncSize = 0;
- FuncSize = FuncSize.getValue() + FSize;
+ FuncSize = FuncSize.value() + FSize;
}
LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; }
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 40de69bbf2cf..55fee213cd5f 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1350,14 +1350,14 @@ SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
- return Cost ? !!Cost.getValue() : false;
+ return Cost ? !!Cost.value() : false;
}
InlineCost
SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
if (Optional<InlineCost> ReplayCost =
getExternalInlineAdvisorCost(*Candidate.CallInstr))
- return ReplayCost.getValue();
+ return ReplayCost.value();
// Adjust threshold based on call site hotness, only do this for callsite
// prioritized inliner because otherwise cost-benefit check is done earlier.
int SampleThreshold = SampleColdCallSiteThreshold;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index f4d8b79a5311..535a7736454c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1660,8 +1660,9 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
Constant *MulC;
if (match(&I, m_c_FAdd(m_FMul(m_Value(X), m_ImmConstant(MulC)),
m_Deferred(X)))) {
- MulC = ConstantExpr::getFAdd(MulC, ConstantFP::get(I.getType(), 1.0));
- return BinaryOperator::CreateFMulFMF(X, MulC, &I);
+ if (Constant *NewMulC = ConstantFoldBinaryOpOperands(
+ Instruction::FAdd, MulC, ConstantFP::get(I.getType(), 1.0), DL))
+ return BinaryOperator::CreateFMulFMF(X, NewMulC, &I);
}
if (Value *V = FAddCombine(Builder).simplify(&I))
@@ -1750,6 +1751,52 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
return Builder.CreateIntCast(Result, Ty, true);
}
+static Instruction *foldSubOfMinMax(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Type *Ty = I.getType();
+ auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op1);
+ if (!MinMax)
+ return nullptr;
+
+ // sub(add(X,Y), s/umin(X,Y)) --> s/umax(X,Y)
+ // sub(add(X,Y), s/umax(X,Y)) --> s/umin(X,Y)
+ Value *X = MinMax->getLHS();
+ Value *Y = MinMax->getRHS();
+ if (match(Op0, m_c_Add(m_Specific(X), m_Specific(Y))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMax->getIntrinsicID());
+ Function *F = Intrinsic::getDeclaration(I.getModule(), InvID, Ty);
+ return CallInst::Create(F, {X, Y});
+ }
+
+ // sub(add(X,Y),umin(Y,Z)) --> add(X,usub.sat(Y,Z))
+ // sub(add(X,Z),umin(Y,Z)) --> add(X,usub.sat(Z,Y))
+ Value *Z;
+ if (match(Op1, m_OneUse(m_UMin(m_Value(Y), m_Value(Z))))) {
+ if (match(Op0, m_OneUse(m_c_Add(m_Specific(Y), m_Value(X))))) {
+ Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, Ty, {Y, Z});
+ return BinaryOperator::CreateAdd(X, USub);
+ }
+ if (match(Op0, m_OneUse(m_c_Add(m_Specific(Z), m_Value(X))))) {
+ Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, Ty, {Z, Y});
+ return BinaryOperator::CreateAdd(X, USub);
+ }
+ }
+
+ // sub Op0, smin((sub nsw Op0, Z), 0) --> smax Op0, Z
+ // sub Op0, smax((sub nsw Op0, Z), 0) --> smin Op0, Z
+ if (MinMax->isSigned() && match(Y, m_ZeroInt()) &&
+ match(X, m_NSWSub(m_Specific(Op0), m_Value(Z)))) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMax->getIntrinsicID());
+ Function *F = Intrinsic::getDeclaration(I.getModule(), InvID, Ty);
+ return CallInst::Create(F, {Op0, Z});
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (Value *V = simplifySubInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@@ -1919,14 +1966,12 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
}
+ // If there's no chance any bit will need to borrow from an adjacent bit:
+ // sub C, X --> xor X, C
const APInt *Op0C;
- if (match(Op0, m_APInt(Op0C)) && Op0C->isMask()) {
- // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
- // zero.
- KnownBits RHSKnown = computeKnownBits(Op1, 0, &I);
- if ((*Op0C | RHSKnown.Zero).isAllOnes())
- return BinaryOperator::CreateXor(Op1, Op0);
- }
+ if (match(Op0, m_APInt(Op0C)) &&
+ (~computeKnownBits(Op1, 0, &I).Zero).isSubsetOf(*Op0C))
+ return BinaryOperator::CreateXor(Op1, Op0);
{
Value *Y;
@@ -2016,36 +2061,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
}
}
- if (auto *II = dyn_cast<MinMaxIntrinsic>(Op1)) {
- {
- // sub(add(X,Y), s/umin(X,Y)) --> s/umax(X,Y)
- // sub(add(X,Y), s/umax(X,Y)) --> s/umin(X,Y)
- Value *X = II->getLHS();
- Value *Y = II->getRHS();
- if (match(Op0, m_c_Add(m_Specific(X), m_Specific(Y))) &&
- (Op0->hasOneUse() || Op1->hasOneUse())) {
- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
- return replaceInstUsesWith(I, InvMaxMin);
- }
- }
-
- {
- // sub(add(X,Y),umin(Y,Z)) --> add(X,usub.sat(Y,Z))
- // sub(add(X,Z),umin(Y,Z)) --> add(X,usub.sat(Z,Y))
- Value *X, *Y, *Z;
- if (match(Op1, m_OneUse(m_UMin(m_Value(Y), m_Value(Z))))) {
- if (match(Op0, m_OneUse(m_c_Add(m_Specific(Y), m_Value(X)))))
- return BinaryOperator::CreateAdd(
- X, Builder.CreateIntrinsic(Intrinsic::usub_sat, I.getType(),
- {Y, Z}));
- if (match(Op0, m_OneUse(m_c_Add(m_Specific(Z), m_Value(X)))))
- return BinaryOperator::CreateAdd(
- X, Builder.CreateIntrinsic(Intrinsic::usub_sat, I.getType(),
- {Z, Y}));
- }
- }
- }
+ if (Instruction *R = foldSubOfMinMax(I, Builder))
+ return R;
{
// If we have a subtraction between some value and a select between
@@ -2437,13 +2454,15 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
// (X * C) - X --> X * (C - 1.0)
if (match(Op0, m_FMul(m_Specific(Op1), m_Constant(C)))) {
- Constant *CSubOne = ConstantExpr::getFSub(C, ConstantFP::get(Ty, 1.0));
- return BinaryOperator::CreateFMulFMF(Op1, CSubOne, &I);
+ if (Constant *CSubOne = ConstantFoldBinaryOpOperands(
+ Instruction::FSub, C, ConstantFP::get(Ty, 1.0), DL))
+ return BinaryOperator::CreateFMulFMF(Op1, CSubOne, &I);
}
// X - (X * C) --> X * (1.0 - C)
if (match(Op1, m_FMul(m_Specific(Op0), m_Constant(C)))) {
- Constant *OneSubC = ConstantExpr::getFSub(ConstantFP::get(Ty, 1.0), C);
- return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I);
+ if (Constant *OneSubC = ConstantFoldBinaryOpOperands(
+ Instruction::FSub, ConstantFP::get(Ty, 1.0), C, DL))
+ return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I);
}
// Reassociate fsub/fadd sequences to create more fadd instructions and
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index ae8865651ece..a8f2cd79830a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1771,6 +1771,16 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return new ZExtInst(IsZero, Ty);
}
+ // (-(X & 1)) & Y --> (X & 1) == 0 ? 0 : Y
+ Value *Neg;
+ if (match(&I,
+ m_c_And(m_CombineAnd(m_Value(Neg),
+ m_OneUse(m_Neg(m_And(m_Value(), m_One())))),
+ m_Value(Y)))) {
+ Value *Cmp = Builder.CreateIsNull(Neg);
+ return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);
+ }
+
const APInt *C;
if (match(Op1, m_APInt(C))) {
const APInt *XorC;
@@ -1798,7 +1808,8 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
unsigned Width = Ty->getScalarSizeInBits();
const APInt *ShiftC;
- if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC)))))) {
+ if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC))))) &&
+ ShiftC->ult(Width)) {
if (*C == APInt::getLowBitsSet(Width, Width - ShiftC->getZExtValue())) {
// We are clearing high bits that were potentially set by sext+ashr:
// and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
index 2540e545ae4d..0327efbf9614 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -61,7 +61,13 @@ bool isIdempotentRMW(AtomicRMWInst& RMWI) {
/// equivalent to its value operand.
bool isSaturating(AtomicRMWInst& RMWI) {
if (auto CF = dyn_cast<ConstantFP>(RMWI.getValOperand()))
- switch(RMWI.getOperation()) {
+ switch (RMWI.getOperation()) {
+ case AtomicRMWInst::FMax:
+ // maxnum(x, +inf) -> +inf
+ return !CF->isNegative() && CF->isInfinity();
+ case AtomicRMWInst::FMin:
+ // minnum(x, -inf) -> +inf
+ return CF->isNegative() && CF->isInfinity();
case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:
return CF->isNaN();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 67ef2e895b6c..edfdf70c2b97 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1543,7 +1543,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
!ShAmtC->containsConstantExpression()) {
// Canonicalize a shift amount constant operand to modulo the bit-width.
Constant *WidthC = ConstantInt::get(Ty, BitWidth);
- Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC);
+ Constant *ModuloC =
+ ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
+ if (!ModuloC)
+ return nullptr;
if (ModuloC != ShAmtC)
return replaceOperand(*II, 2, ModuloC);
@@ -2679,7 +2682,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Handle target specific intrinsics
Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
if (V)
- return V.getValue();
+ return V.value();
break;
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index e9e779b8619b..a9a930555b3c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1756,11 +1756,12 @@ static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {
// TODO:
// Try harder to find if the source integer type has less significant bits.
- // For example, compute number of sign bits or compute low bit mask.
+ // For example, compute number of sign bits.
KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
- int LowBits =
- (int)SrcTy->getScalarSizeInBits() - SrcKnown.countMinLeadingZeros();
- if (LowBits <= DestNumSigBits)
+ int SigBits = (int)SrcTy->getScalarSizeInBits() -
+ SrcKnown.countMinLeadingZeros() -
+ SrcKnown.countMinTrailingZeros();
+ if (SigBits <= DestNumSigBits)
return true;
return false;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index d1f89973caa1..9f6d36b85522 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1436,7 +1436,7 @@ Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
// icmp(phi(C1, C2, ...), C) -> phi(icmp(C1, C), icmp(C2, C), ...).
Constant *C = dyn_cast<Constant>(Op1);
- if (!C || C->canTrap())
+ if (!C)
return nullptr;
if (auto *Phi = dyn_cast<PHINode>(Op0))
@@ -1777,11 +1777,16 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
return new ICmpInst(NewPred, X, Zero);
}
+ APInt NewC2 = *C2;
+ KnownBits Know = computeKnownBits(And->getOperand(0), 0, And);
+ // Set high zeros of C2 to allow matching negated power-of-2.
+ NewC2 = *C2 + APInt::getHighBitsSet(C2->getBitWidth(),
+ Know.countMinLeadingZeros());
+
// Restrict this fold only for single-use 'and' (PR10267).
// ((%x & C) == 0) --> %x u< (-C) iff (-C) is power of two.
- if ((~(*C2) + 1).isPowerOf2()) {
- Constant *NegBOC =
- ConstantExpr::getNeg(cast<Constant>(And->getOperand(1)));
+ if (NewC2.isNegatedPowerOf2()) {
+ Constant *NegBOC = ConstantInt::get(And->getType(), -NewC2);
auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
return new ICmpInst(NewPred, X, NegBOC);
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2a34edbf6cb8..8cb09cbac86f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -505,20 +505,23 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
Constant *C1;
if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
// (C1 / X) * C --> (C * C1) / X
- Constant *CC1 = ConstantExpr::getFMul(C, C1);
- if (CC1->isNormalFP())
+ Constant *CC1 =
+ ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);
+ if (CC1 && CC1->isNormalFP())
return BinaryOperator::CreateFDivFMF(CC1, X, &I);
}
if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
// (X / C1) * C --> X * (C / C1)
- Constant *CDivC1 = ConstantExpr::getFDiv(C, C1);
- if (CDivC1->isNormalFP())
+ Constant *CDivC1 =
+ ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);
+ if (CDivC1 && CDivC1->isNormalFP())
return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
// If the constant was a denormal, try reassociating differently.
// (X / C1) * C --> X / (C1 / C)
- Constant *C1DivC = ConstantExpr::getFDiv(C1, C);
- if (Op0->hasOneUse() && C1DivC->isNormalFP())
+ Constant *C1DivC =
+ ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);
+ if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())
return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
}
@@ -527,15 +530,19 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
// further folds and (X * C) + C2 is 'fma'.
if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {
// (X + C1) * C --> (X * C) + (C * C1)
- Constant *CC1 = ConstantExpr::getFMul(C, C1);
- Value *XC = Builder.CreateFMulFMF(X, C, &I);
- return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
+ if (Constant *CC1 = ConstantFoldBinaryOpOperands(
+ Instruction::FMul, C, C1, DL)) {
+ Value *XC = Builder.CreateFMulFMF(X, C, &I);
+ return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
+ }
}
if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
// (C1 - X) * C --> (C * C1) - (X * C)
- Constant *CC1 = ConstantExpr::getFMul(C, C1);
- Value *XC = Builder.CreateFMulFMF(X, C, &I);
- return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+ if (Constant *CC1 = ConstantFoldBinaryOpOperands(
+ Instruction::FMul, C, C1, DL)) {
+ Value *XC = Builder.CreateFMulFMF(X, C, &I);
+ return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+ }
}
}
@@ -1232,8 +1239,10 @@ static Instruction *foldFDivConstantDivisor(BinaryOperator &I) {
// on all targets.
// TODO: Use Intrinsic::canonicalize or let function attributes tell us that
// denorms are flushed?
- auto *RecipC = ConstantExpr::getFDiv(ConstantFP::get(I.getType(), 1.0), C);
- if (!RecipC->isNormalFP())
+ const DataLayout &DL = I.getModule()->getDataLayout();
+ auto *RecipC = ConstantFoldBinaryOpOperands(
+ Instruction::FDiv, ConstantFP::get(I.getType(), 1.0), C, DL);
+ if (!RecipC || !RecipC->isNormalFP())
return nullptr;
// X / C --> X * (1 / C)
@@ -1256,12 +1265,13 @@ static Instruction *foldFDivConstantDividend(BinaryOperator &I) {
// Try to reassociate C / X expressions where X includes another constant.
Constant *C2, *NewC = nullptr;
+ const DataLayout &DL = I.getModule()->getDataLayout();
if (match(I.getOperand(1), m_FMul(m_Value(X), m_Constant(C2)))) {
// C / (X * C2) --> (C / C2) / X
- NewC = ConstantExpr::getFDiv(C, C2);
+ NewC = ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C2, DL);
} else if (match(I.getOperand(1), m_FDiv(m_Value(X), m_Constant(C2)))) {
// C / (X / C2) --> (C * C2) / X
- NewC = ConstantExpr::getFMul(C, C2);
+ NewC = ConstantFoldBinaryOpOperands(Instruction::FMul, C, C2, DL);
}
// Disallow denormal constants because we don't know what would happen
// on all targets.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 9d4c01ac03e2..febd0f51d25f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -925,7 +925,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
*II, DemandedMask, Known, KnownBitsComputed);
if (V)
- return V.getValue();
+ return V.value();
break;
}
}
@@ -1636,7 +1636,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
*II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
simplifyAndSetOp);
if (V)
- return V.getValue();
+ return V.value();
break;
}
} // switch on IntrinsicID
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 22659a8e4951..b80c58183dd5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -228,8 +228,9 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
// truncate a subset of scalar bits of an insert op.
if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
Value *Scalar;
+ Value *Vec;
uint64_t InsIndexC;
- if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar),
+ if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
m_ConstantInt(InsIndexC))))
return nullptr;
@@ -239,8 +240,19 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
// of elements 4-7 of the bitcasted vector.
unsigned NarrowingRatio =
NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
- if (ExtIndexC / NarrowingRatio != InsIndexC)
+
+ if (ExtIndexC / NarrowingRatio != InsIndexC) {
+ // Remove insertelement, if we don't use the inserted element.
+ // extractelement (bitcast (insertelement (Vec, b)), a) ->
+ // extractelement (bitcast (Vec), a)
+ // FIXME: this should be removed to SimplifyDemandedVectorElts,
+ // once scale vectors are supported.
+ if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
+ Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
+ return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
+ }
return nullptr;
+ }
// We are extracting part of the original scalar. How that scalar is
// inserted into the vector depends on the endian-ness. Example:
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 0816a4a575d9..75520a0c8d5f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -523,11 +523,12 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
// if C1 and C2 are constants.
Value *A, *B;
- Constant *C1, *C2;
+ Constant *C1, *C2, *CRes;
if (Op0 && Op1 &&
Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
- match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) {
+ match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
+ (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
bool IsNUW = hasNoUnsignedWrap(I) &&
hasNoUnsignedWrap(*Op0) &&
hasNoUnsignedWrap(*Op1);
@@ -544,7 +545,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
InsertNewInstWith(NewBO, I);
NewBO->takeName(Op1);
replaceOperand(I, 0, NewBO);
- replaceOperand(I, 1, ConstantExpr::get(Opcode, C1, C2));
+ replaceOperand(I, 1, CRes);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
ClearSubclassDataAfterReassociation(I);
@@ -1324,6 +1325,11 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
if (!isGuaranteedToTransferExecutionToSuccessor(&*BBIter))
return nullptr;
+ // Fold constants for the predecessor block with constant incoming values.
+ Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
+ if (!NewC)
+ return nullptr;
+
// Make a new binop in the predecessor block with the non-constant incoming
// values.
Builder.SetInsertPoint(PredBlockBranch);
@@ -1333,9 +1339,6 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
NotFoldedNewBO->copyIRFlags(&BO);
- // Fold constants for the predecessor block with constant incoming values.
- Constant *NewC = ConstantExpr::get(BO.getOpcode(), C0, C1);
-
// Replace the binop with a phi of the new values. The old phis are dead.
PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
NewPhi->addIncoming(NewBO, OtherBB);
@@ -1774,9 +1777,10 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
// for target-independent shuffle creation.
if (I >= SrcVecNumElts || ShMask[I] < 0) {
Constant *MaybeUndef =
- ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt)
- : ConstantExpr::get(Opcode, CElt, UndefScalar);
- if (!match(MaybeUndef, m_Undef())) {
+ ConstOp1
+ ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL)
+ : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL);
+ if (!MaybeUndef || !match(MaybeUndef, m_Undef())) {
MayChange = false;
break;
}
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7a5a74aa4fff..4fed4bd18fb1 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -101,6 +101,7 @@ static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;
static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000;
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 44;
static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52;
+static const uint64_t kMIPS_ShadowOffsetN32 = 1ULL << 29;
static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;
static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
@@ -476,6 +477,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
TargetTriple.getArch() == Triple::ppc64le;
bool IsSystemZ = TargetTriple.getArch() == Triple::systemz;
bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
+ bool IsMIPSN32ABI = TargetTriple.getEnvironment() == Triple::GNUABIN32;
bool IsMIPS32 = TargetTriple.isMIPS32();
bool IsMIPS64 = TargetTriple.isMIPS64();
bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb();
@@ -496,6 +498,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
if (LongSize == 32) {
if (IsAndroid)
Mapping.Offset = kDynamicShadowSentinel;
+ else if (IsMIPSN32ABI)
+ Mapping.Offset = kMIPS_ShadowOffsetN32;
else if (IsMIPS32)
Mapping.Offset = kMIPS32_ShadowOffset32;
else if (IsFreeBSD)
diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index b11b84d65d23..57c491436b93 100644
--- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -39,7 +39,8 @@ addModuleFlags(Module &M,
Nodes.push_back(MDNode::get(Context, Vals));
}
- M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
+ M.addModuleFlag(Module::Append, "CG Profile",
+ MDTuple::getDistinct(Context, Nodes));
return true;
}
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 218b4bbfb6c0..b01c74320380 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -180,11 +180,31 @@ static cl::opt<bool> ClWithTls(
"platforms that support this"),
cl::Hidden, cl::init(true));
-static cl::opt<bool>
- ClRecordStackHistory("hwasan-record-stack-history",
- cl::desc("Record stack frames with tagged allocations "
- "in a thread-local ring buffer"),
- cl::Hidden, cl::init(true));
+// Mode for selecting how to insert frame record info into the stack ring
+// buffer.
+enum RecordStackHistoryMode {
+ // Do not record frame record info.
+ none,
+
+ // Insert instructions into the prologue for storing into the stack ring
+ // buffer directly.
+ instr,
+
+ // Add a call to __hwasan_add_frame_record in the runtime.
+ libcall,
+};
+
+static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
+ "hwasan-record-stack-history",
+ cl::desc("Record stack frames with tagged allocations in a thread-local "
+ "ring buffer"),
+ cl::values(clEnumVal(none, "Do not record stack ring history"),
+ clEnumVal(instr, "Insert instructions into the prologue for "
+ "storing into the stack ring buffer directly"),
+ clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
+ "storing into the stack ring buffer")),
+ cl::Hidden, cl::init(instr));
+
static cl::opt<bool>
ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
cl::desc("instrument memory intrinsics"),
@@ -313,6 +333,7 @@ public:
Value *getPC(IRBuilder<> &IRB);
Value *getSP(IRBuilder<> &IRB);
+ Value *getFrameRecordInfo(IRBuilder<> &IRB);
void instrumentPersonalityFunctions();
@@ -378,6 +399,7 @@ private:
FunctionCallee HwasanTagMemoryFunc;
FunctionCallee HwasanGenerateTagFunc;
+ FunctionCallee HwasanRecordFrameRecordFunc;
Constant *ShadowGlobal;
@@ -629,6 +651,9 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
HwasanGenerateTagFunc =
M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
+ HwasanRecordFrameRecordFunc = M.getOrInsertFunction(
+ "__hwasan_add_frame_record", IRB.getVoidTy(), Int64Ty);
+
ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
ArrayType::get(IRB.getInt8Ty(), 0));
@@ -1132,6 +1157,21 @@ Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) {
return CachedSP;
}
+Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
+ // Prepare ring buffer data.
+ Value *PC = getPC(IRB);
+ Value *SP = getSP(IRB);
+
+ // Mix SP and PC.
+ // Assumptions:
+ // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
+ // SP is 0xsssssssssssSSSS0 (4 lower bits are zero)
+ // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
+ // 0xSSSSPPPPPPPPPPPP
+ SP = IRB.CreateShl(SP, 44);
+ return IRB.CreateOr(PC, SP);
+}
+
void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
if (!Mapping.InTls)
ShadowBase = getShadowNonTls(IRB);
@@ -1141,50 +1181,67 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
if (!WithFrameRecord && ShadowBase)
return;
- Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
- assert(SlotPtr);
+ Value *SlotPtr = nullptr;
+ Value *ThreadLong = nullptr;
+ Value *ThreadLongMaybeUntagged = nullptr;
- Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
- // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
- Value *ThreadLongMaybeUntagged =
- TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
+ auto getThreadLongMaybeUntagged = [&]() {
+ if (!SlotPtr)
+ SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
+ if (!ThreadLong)
+ ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
+ // Extract the address field from ThreadLong. Unnecessary on AArch64 with
+ // TBI.
+ return TargetTriple.isAArch64() ? ThreadLong
+ : untagPointer(IRB, ThreadLong);
+ };
if (WithFrameRecord) {
- StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
-
- // Prepare ring buffer data.
- Value *PC = getPC(IRB);
- Value *SP = getSP(IRB);
+ switch (ClRecordStackHistory) {
+ case libcall: {
+ // Emit a runtime call into hwasan rather than emitting instructions for
+ // recording stack history.
+ Value *FrameRecordInfo = getFrameRecordInfo(IRB);
+ IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
+ break;
+ }
+ case instr: {
+ ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
- // Mix SP and PC.
- // Assumptions:
- // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
- // SP is 0xsssssssssssSSSS0 (4 lower bits are zero)
- // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
- // 0xSSSSPPPPPPPPPPPP
- SP = IRB.CreateShl(SP, 44);
+ StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
- // Store data to ring buffer.
- Value *RecordPtr =
- IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
- IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
+ // Store data to ring buffer.
+ Value *FrameRecordInfo = getFrameRecordInfo(IRB);
+ Value *RecordPtr = IRB.CreateIntToPtr(ThreadLongMaybeUntagged,
+ IntptrTy->getPointerTo(0));
+ IRB.CreateStore(FrameRecordInfo, RecordPtr);
- // Update the ring buffer. Top byte of ThreadLong defines the size of the
- // buffer in pages, it must be a power of two, and the start of the buffer
- // must be aligned by twice that much. Therefore wrap around of the ring
- // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
- // The use of AShr instead of LShr is due to
- // https://bugs.llvm.org/show_bug.cgi?id=39030
- // Runtime library makes sure not to use the highest bit.
- Value *WrapMask = IRB.CreateXor(
- IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
- ConstantInt::get(IntptrTy, (uint64_t)-1));
- Value *ThreadLongNew = IRB.CreateAnd(
- IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
- IRB.CreateStore(ThreadLongNew, SlotPtr);
+ // Update the ring buffer. Top byte of ThreadLong defines the size of the
+ // buffer in pages, it must be a power of two, and the start of the buffer
+ // must be aligned by twice that much. Therefore wrap around of the ring
+ // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
+ // The use of AShr instead of LShr is due to
+ // https://bugs.llvm.org/show_bug.cgi?id=39030
+ // Runtime library makes sure not to use the highest bit.
+ Value *WrapMask = IRB.CreateXor(
+ IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
+ ConstantInt::get(IntptrTy, (uint64_t)-1));
+ Value *ThreadLongNew = IRB.CreateAnd(
+ IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
+ IRB.CreateStore(ThreadLongNew, SlotPtr);
+ break;
+ }
+ case none: {
+ llvm_unreachable(
+ "A stack history recording mode should've been selected.");
+ }
+ }
}
if (!ShadowBase) {
+ if (!ThreadLongMaybeUntagged)
+ ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
+
// Get shadow base address by aligning RecordPtr up.
// Note: this is not correct if the pointer is already aligned.
// Runtime library will make sure this never happens.
@@ -1408,7 +1465,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F,
Instruction *InsertPt = &*F.getEntryBlock().begin();
IRBuilder<> EntryIRB(InsertPt);
emitPrologue(EntryIRB,
- /*WithFrameRecord*/ ClRecordStackHistory &&
+ /*WithFrameRecord*/ ClRecordStackHistory != none &&
Mapping.WithFrameRecord &&
!SInfo.AllocasToInstrument.empty());
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 7843b1522830..3572cb3b50e2 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1244,6 +1244,7 @@ bool InstrProfiling::emitRuntimeHook() {
auto *Var =
new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, getInstrProfRuntimeHookVarName());
+ Var->setVisibility(GlobalValue::HiddenVisibility);
if (TT.isOSBinFormatELF() && !TT.isPS()) {
// Mark the user variable as used so that it isn't stripped out.
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index c33b1b3b1a5c..d4aa31db8337 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -486,7 +486,7 @@ static bool isTsanAtomic(const Instruction *I) {
if (!SSID)
return false;
if (isa<LoadInst>(I) || isa<StoreInst>(I))
- return SSID.getValue() != SyncScope::SingleThread;
+ return SSID.value() != SyncScope::SingleThread;
return true;
}
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 8a1761505d59..fe6f9486ab0c 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -611,9 +611,9 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
ConstCand->ConstInt->getValue());
if (Diff) {
const InstructionCost ImmCosts =
- TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.getValue(), Ty);
+ TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.value(), Ty);
Cost -= ImmCosts;
- LLVM_DEBUG(dbgs() << "Offset " << Diff.getValue() << " "
+ LLVM_DEBUG(dbgs() << "Offset " << Diff.value() << " "
<< "has penalty: " << ImmCosts << "\n"
<< "Adjusted cost: " << Cost << "\n");
}
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 783301fe589e..b460637b7d88 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -748,14 +748,14 @@ void GVNPass::printPipeline(
OS << "<";
if (Options.AllowPRE != None)
- OS << (Options.AllowPRE.getValue() ? "" : "no-") << "pre;";
+ OS << (Options.AllowPRE.value() ? "" : "no-") << "pre;";
if (Options.AllowLoadPRE != None)
- OS << (Options.AllowLoadPRE.getValue() ? "" : "no-") << "load-pre;";
+ OS << (Options.AllowLoadPRE.value() ? "" : "no-") << "load-pre;";
if (Options.AllowLoadPRESplitBackedge != None)
- OS << (Options.AllowLoadPRESplitBackedge.getValue() ? "" : "no-")
+ OS << (Options.AllowLoadPRESplitBackedge.value() ? "" : "no-")
<< "split-backedge-load-pre;";
if (Options.AllowMemDep != None)
- OS << (Options.AllowMemDep.getValue() ? "" : "no-") << "memdep";
+ OS << (Options.AllowMemDep.value() ? "" : "no-") << "memdep";
OS << ">";
}
@@ -1059,8 +1059,8 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
if (DT->dominates(cast<Instruction>(OtherAccess), cast<Instruction>(U)))
OtherAccess = U;
else
- assert(DT->dominates(cast<Instruction>(U),
- cast<Instruction>(OtherAccess)));
+ assert(U == OtherAccess || DT->dominates(cast<Instruction>(U),
+ cast<Instruction>(OtherAccess)));
} else
OtherAccess = U;
}
@@ -1494,14 +1494,6 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- // FIXME: Can we support the fallthrough edge?
- if (isa<CallBrInst>(Pred->getTerminator())) {
- LLVM_DEBUG(
- dbgs() << "COULD NOT PRE LOAD BECAUSE OF CALLBR CRITICAL EDGE '"
- << Pred->getName() << "': " << *Load << '\n');
- return false;
- }
-
if (LoadBB->isEHPad()) {
LLVM_DEBUG(
dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
@@ -2875,11 +2867,6 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) {
if (isa<IndirectBrInst>(PREPred->getTerminator()))
return false;
- // Don't do PRE across callbr.
- // FIXME: Can we do this across the fallthrough edge?
- if (isa<CallBrInst>(PREPred->getTerminator()))
- return false;
-
// We can't do PRE safely on a critical edge, so instead we schedule
// the edge to be split and perform the PRE the next time we iterate
// on the function.
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index e977dd18be9f..a9ca0bdc8f7b 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -106,13 +106,18 @@ static cl::opt<bool> VerifyIndvars(
static cl::opt<ReplaceExitVal> ReplaceExitValue(
"replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
- cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
- clEnumValN(OnlyCheapRepl, "cheap",
- "only replace exit value when the cost is cheap"),
- clEnumValN(NoHardUse, "noharduse",
- "only replace exit values when loop def likely dead"),
- clEnumValN(AlwaysRepl, "always",
- "always replace exit value whenever possible")));
+ cl::values(
+ clEnumValN(NeverRepl, "never", "never replace exit value"),
+ clEnumValN(OnlyCheapRepl, "cheap",
+ "only replace exit value when the cost is cheap"),
+ clEnumValN(
+ UnusedIndVarInLoop, "unusedindvarinloop",
+ "only replace exit value when it is an unused "
+ "induction variable in the loop and has cheap replacement cost"),
+ clEnumValN(NoHardUse, "noharduse",
+ "only replace exit values when loop def likely dead"),
+ clEnumValN(AlwaysRepl, "always",
+ "always replace exit value whenever possible")));
static cl::opt<bool> UsePostIncrementRanges(
"indvars-post-increment-ranges", cl::Hidden,
@@ -1302,15 +1307,39 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
}
static void replaceLoopPHINodesWithPreheaderValues(
- Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
auto *LoopPreheader = L->getLoopPreheader();
auto *LoopHeader = L->getHeader();
+ SmallVector<Instruction *> Worklist;
for (auto &PN : LoopHeader->phis()) {
auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
+ for (User *U : PN.users())
+ Worklist.push_back(cast<Instruction>(U));
PN.replaceAllUsesWith(PreheaderIncoming);
DeadInsts.emplace_back(&PN);
}
+
+ // Replacing with the preheader value will often allow IV users to simplify
+ // (especially if the preheader value is a constant).
+ SmallPtrSet<Instruction *, 16> Visited;
+ while (!Worklist.empty()) {
+ auto *I = cast<Instruction>(Worklist.pop_back_val());
+ if (!Visited.insert(I).second)
+ continue;
+
+ // Don't simplify instructions outside the loop.
+ if (!L->contains(I))
+ continue;
+
+ Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
+ if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
+ for (User *U : I->users())
+ Worklist.push_back(cast<Instruction>(U));
+ I->replaceAllUsesWith(Res);
+ DeadInsts.emplace_back(I);
+ }
+ }
}
static void replaceWithInvariantCond(
@@ -1549,14 +1578,19 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (!BI)
return true;
- // If already constant, nothing to do.
- if (isa<Constant>(BI->getCondition()))
- return true;
-
// Likewise, the loop latch must be dominated by the exiting BB.
if (!DT->dominates(ExitingBB, L->getLoopLatch()))
return true;
+ if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // If already constant, nothing to do. However, if this is an
+ // unconditional exit, we can still replace header phis with their
+ // preheader value.
+ if (!L->contains(BI->getSuccessor(CI->isNullValue())))
+ replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts);
+ return true;
+ }
+
return false;
});
@@ -1640,7 +1674,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// the header PHIs with values coming from the preheader.
if (ExitCount->isZero()) {
foldExit(L, ExitingBB, true, DeadInsts);
- replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
+ replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts);
Changed = true;
continue;
}
diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 799669a19796..b54cf5e7cb20 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -1710,7 +1710,7 @@ IntersectSignedRange(ScalarEvolution &SE,
return None;
if (!R1)
return R2;
- auto &R1Value = R1.getValue();
+ auto &R1Value = R1.value();
// We never return empty ranges from this function, and R1 is supposed to be
// a result of intersection. Thus, R1 is never empty.
assert(!R1Value.isEmpty(SE, /* IsSigned */ true) &&
@@ -1739,7 +1739,7 @@ IntersectUnsignedRange(ScalarEvolution &SE,
return None;
if (!R1)
return R2;
- auto &R1Value = R1.getValue();
+ auto &R1Value = R1.value();
// We never return empty ranges from this function, and R1 is supposed to be
// a result of intersection. Thus, R1 is never empty.
assert(!R1Value.isEmpty(SE, /* IsSigned */ false) &&
@@ -1950,13 +1950,12 @@ bool InductiveRangeCheckElimination::run(
LS.IsSignedPredicate);
if (Result) {
auto MaybeSafeIterRange =
- IntersectRange(SE, SafeIterRange, Result.getValue());
+ IntersectRange(SE, SafeIterRange, Result.value());
if (MaybeSafeIterRange) {
- assert(
- !MaybeSafeIterRange.getValue().isEmpty(SE, LS.IsSignedPredicate) &&
- "We should never return empty ranges!");
+ assert(!MaybeSafeIterRange.value().isEmpty(SE, LS.IsSignedPredicate) &&
+ "We should never return empty ranges!");
RangeChecksToEliminate.push_back(IRC);
- SafeIterRange = MaybeSafeIterRange.getValue();
+ SafeIterRange = MaybeSafeIterRange.value();
}
}
}
@@ -1964,8 +1963,7 @@ bool InductiveRangeCheckElimination::run(
if (!SafeIterRange)
return false;
- LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT,
- SafeIterRange.getValue());
+ LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT, SafeIterRange.value());
bool Changed = LC.run();
if (Changed) {
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 5caefc422921..b31eab50c5ec 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1459,9 +1459,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// Add all the unavailable predecessors to the PredsToSplit list.
for (BasicBlock *P : predecessors(LoadBB)) {
// If the predecessor is an indirect goto, we can't split the edge.
- // Same for CallBr.
- if (isa<IndirectBrInst>(P->getTerminator()) ||
- isa<CallBrInst>(P->getTerminator()))
+ if (isa<IndirectBrInst>(P->getTerminator()))
return false;
if (!AvailablePredSet.count(P))
@@ -1685,9 +1683,8 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
}
// If the predecessor ends with an indirect goto, we can't change its
- // destination. Same for CallBr.
- if (isa<IndirectBrInst>(Pred->getTerminator()) ||
- isa<CallBrInst>(Pred->getTerminator()))
+ // destination.
+ if (isa<IndirectBrInst>(Pred->getTerminator()))
continue;
PredToDestList.emplace_back(Pred, DestBB);
@@ -1924,10 +1921,9 @@ bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
}
// If any of predecessors end with an indirect goto, we can't change its
- // destination. Same for CallBr.
+ // destination.
if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
- return isa<IndirectBrInst>(Pred->getTerminator()) ||
- isa<CallBrInst>(Pred->getTerminator());
+ return isa<IndirectBrInst>(Pred->getTerminator());
}))
return false;
@@ -2173,6 +2169,9 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
BasicBlock *ZeroPred = nullptr;
BasicBlock *OnePred = nullptr;
for (BasicBlock *P : predecessors(PredBB)) {
+ // If PredPred ends with IndirectBrInst, we can't handle it.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ continue;
if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
evaluateOnPredecessorEdge(BB, P, Cond))) {
if (CI->isZero()) {
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 492f4e40395a..f54264b1dca6 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1508,8 +1508,7 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
return false;
for (BasicBlock *BBPred : predecessors(BB)) {
- if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
- isa<CallBrInst>(BBPred->getTerminator()))
+ if (isa<IndirectBrInst>(BBPred->getTerminator()))
return false;
}
return true;
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 03a10cb36bb6..b178bcae3b0e 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -602,7 +602,7 @@ private:
: LLVMLoopDistributeFollowupCoincident});
if (PartitionID) {
Loop *NewLoop = Part->getDistributedLoop();
- NewLoop->setLoopID(PartitionID.getValue());
+ NewLoop->setLoopID(PartitionID.value());
}
}
};
@@ -826,7 +826,7 @@ public:
{LLVMLoopDistributeFollowupAll,
LLVMLoopDistributeFollowupFallback},
"llvm.loop.distribute.", true)
- .getValue();
+ .value();
LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
}
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 88d6a7aff3c9..d908c151d9f2 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1483,7 +1483,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// anything where the alignment isn't at least the element size.
assert((StoreAlign && LoadAlign) &&
"Expect unordered load/store to have align.");
- if (StoreAlign.getValue() < StoreSize || LoadAlign.getValue() < StoreSize)
+ if (StoreAlign.value() < StoreSize || LoadAlign.value() < StoreSize)
return Changed;
// If the element.atomic memcpy is not lowered into explicit
@@ -1497,7 +1497,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// Note that unordered atomic loads/stores are *required* by the spec to
// have an alignment but non-atomic loads/stores may not.
NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
- StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(),
+ StoreBasePtr, StoreAlign.value(), LoadBasePtr, LoadAlign.value(),
NumBytes, StoreSize, AATags.TBAA, AATags.TBAAStruct, AATags.Scope,
AATags.NoAlias);
}
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 9959e408e2e2..4ef7809c6681 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -5601,27 +5601,6 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
DeadInsts.emplace_back(OperandIsInstr);
}
-// Check if there are any loop exit values which are only used once within the
-// loop which may potentially be optimized with a call to rewriteLoopExitValue.
-static bool LoopExitValHasSingleUse(Loop *L) {
- BasicBlock *ExitBB = L->getExitBlock();
- if (!ExitBB)
- return false;
-
- for (PHINode &ExitPhi : ExitBB->phis()) {
- if (ExitPhi.getNumIncomingValues() != 1)
- break;
-
- BasicBlock *Pred = ExitPhi.getIncomingBlock(0);
- Value *IVNext = ExitPhi.getIncomingValueForBlock(Pred);
- // One use would be the exit phi node, and there should be only one other
- // use for this to be considered.
- if (IVNext->getNumUses() == 2)
- return true;
- }
- return false;
-}
-
/// Rewrite all the fixup locations with new values, following the chosen
/// solution.
void LSRInstance::ImplementSolution(
@@ -6406,8 +6385,8 @@ static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
// less DWARF ops than an iteration count-based expression.
if (Optional<APInt> Offset =
SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {
- if (Offset.getValue().getMinSignedBits() <= 64)
- SalvageExpr->createOffsetExpr(Offset.getValue().getSExtValue(),
+ if (Offset.value().getMinSignedBits() <= 64)
+ SalvageExpr->createOffsetExpr(Offset.value().getSExtValue(),
LSRInductionVar);
} else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
SE))
@@ -6627,12 +6606,12 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// When this is the case, if the exit value of the IV can be calculated using
// SCEV, we can replace the exit block PHI with the final value of the IV and
// skip the updates in each loop iteration.
- if (L->isRecursivelyLCSSAForm(DT, LI) && LoopExitValHasSingleUse(L)) {
+ if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SCEVExpander Rewriter(SE, DL, "lsr", false);
int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,
- OnlyCheapRepl, DeadInsts);
+ UnusedIndVarInLoop, DeadInsts);
if (Rewrites) {
Changed = true;
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 8c2868563227..64fcdfa15aa9 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -373,7 +373,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupRemainderInner});
if (NewInnerEpilogueLoopID)
- SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
+ SubLoop->setLoopID(NewInnerEpilogueLoopID.value());
// Find trip count and trip multiple
BasicBlock *Latch = L->getLoopLatch();
@@ -403,14 +403,14 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupRemainderOuter});
if (NewOuterEpilogueLoopID)
- EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
+ EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.value());
}
Optional<MDNode *> NewInnerLoopID =
makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupInner});
if (NewInnerLoopID)
- SubLoop->setLoopID(NewInnerLoopID.getValue());
+ SubLoop->setLoopID(NewInnerLoopID.value());
else
SubLoop->setLoopID(OrigSubLoopID);
@@ -419,7 +419,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
OrigOuterLoopID,
{LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
if (NewOuterLoopID) {
- L->setLoopID(NewOuterLoopID.getValue());
+ L->setLoopID(NewOuterLoopID.value());
// Do not setLoopAlreadyUnrolled if a followup was given.
return UnrollResult;
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index fda86afe5f9d..de5833f60adc 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1324,7 +1324,7 @@ static LoopUnrollResult tryToUnrollLoop(
makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
LLVMLoopUnrollFollowupRemainder});
if (RemainderLoopID)
- RemainderLoop->setLoopID(RemainderLoopID.getValue());
+ RemainderLoop->setLoopID(RemainderLoopID.value());
}
if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
@@ -1332,7 +1332,7 @@ static LoopUnrollResult tryToUnrollLoop(
makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
LLVMLoopUnrollFollowupUnrolled});
if (NewLoopID) {
- L->setLoopID(NewLoopID.getValue());
+ L->setLoopID(NewLoopID.value());
// Do not setLoopAlreadyUnrolled if loop attributes have been specified
// explicitly.
@@ -1645,15 +1645,15 @@ void LoopUnrollPass::printPipeline(
OS, MapClassName2PassName);
OS << "<";
if (UnrollOpts.AllowPartial != None)
- OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;";
+ OS << (UnrollOpts.AllowPartial.value() ? "" : "no-") << "partial;";
if (UnrollOpts.AllowPeeling != None)
- OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;";
+ OS << (UnrollOpts.AllowPeeling.value() ? "" : "no-") << "peeling;";
if (UnrollOpts.AllowRuntime != None)
- OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;";
+ OS << (UnrollOpts.AllowRuntime.value() ? "" : "no-") << "runtime;";
if (UnrollOpts.AllowUpperBound != None)
- OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;";
+ OS << (UnrollOpts.AllowUpperBound.value() ? "" : "no-") << "upperbound;";
if (UnrollOpts.AllowProfileBasedPeeling != None)
- OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-")
+ OS << (UnrollOpts.AllowProfileBasedPeeling.value() ? "" : "no-")
<< "profile-peeling;";
if (UnrollOpts.FullUnrollMaxCount != None)
OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";";
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index da1737979305..75f0896d4845 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -1929,11 +1930,23 @@ Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
+ const DataLayout &DL = I->getModule()->getDataLayout();
Constant *Cst = nullptr;
unsigned Opcode = I->getOpcode();
- while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
- Constant *C = cast<Constant>(Ops.pop_back_val().Op);
- Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C;
+ while (!Ops.empty()) {
+ if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
+ if (!Cst) {
+ Ops.pop_back();
+ Cst = C;
+ continue;
+ }
+ if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
+ Ops.pop_back();
+ Cst = Res;
+ continue;
+ }
+ }
+ break;
}
// If there was nothing but constants then we are done.
if (Ops.empty())
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index e9983ff82176..079b2fc973b9 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -769,8 +769,7 @@ llvm::SplitAllCriticalEdges(Function &F,
unsigned NumBroken = 0;
for (BasicBlock &BB : F) {
Instruction *TI = BB.getTerminator();
- if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI) &&
- !isa<CallBrInst>(TI))
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (SplitCriticalEdge(TI, i, Options))
++NumBroken;
@@ -1132,9 +1131,7 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
// all BlockAddress uses would need to be updated.
assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
"Cannot split an edge from an IndirectBrInst");
- assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
- "Cannot split an edge from a CallBrInst");
- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+ Preds[i]->getTerminator()->replaceSuccessorWith(BB, NewBB);
}
// Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 0b36e8708a03..9c595401ce29 100644
--- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -129,8 +129,7 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
SmallVector<BasicBlock *, 4> LoopPreds;
// Check if extra modifications will be required to preserve loop-simplify
// form after splitting. If it would require splitting blocks with IndirectBr
- // or CallBr terminators, bail out if preserving loop-simplify form is
- // requested.
+ // terminators, bail out if preserving loop-simplify form is requested.
if (LI) {
if (Loop *TIL = LI->getLoopFor(TIBB)) {
@@ -156,10 +155,7 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
// Loop-simplify form can be preserved, if we can split all in-loop
// predecessors.
if (any_of(LoopPreds, [](BasicBlock *Pred) {
- const Instruction *T = Pred->getTerminator();
- if (const auto *CBR = dyn_cast<CallBrInst>(T))
- return CBR->getDefaultDest() != Pred;
- return isa<IndirectBrInst>(T);
+ return isa<IndirectBrInst>(Pred->getTerminator());
})) {
if (Options.PreserveLoopSimplify)
return nullptr;
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index f94d854f7ee8..421f1f329f07 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -927,6 +927,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::AlwaysInline:
case Attribute::Cold:
case Attribute::DisableSanitizerInstrumentation:
+ case Attribute::FnRetThunkExtern:
case Attribute::Hot:
case Attribute::NoRecurse:
case Attribute::InlineHint:
@@ -1777,7 +1778,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
if (Count)
newFunction->setEntryCount(
- ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME
+ ProfileCount(Count.value(), Function::PCT_Real)); // FIXME
BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
}
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 205f7a7d9ed2..24126b5ab67b 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -961,8 +961,13 @@ createDebugifyFunctionPass(enum DebugifyMode Mode,
}
PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
- applyDebugifyMetadata(M, M.functions(),
- "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ applyDebugifyMetadata(M, M.functions(),
+ "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
+ else
+ collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
+ "ModuleDebugify (original debuginfo)",
+ NameOfWrappedPass);
return PreservedAnalyses::all();
}
@@ -992,8 +997,14 @@ FunctionPass *createCheckDebugifyFunctionPass(
PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
ModuleAnalysisManager &) {
- checkDebugifyMetadata(M, M.functions(), "", "CheckModuleDebugify", false,
- nullptr);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
+ "CheckModuleDebugify", Strip, StatsMap);
+ else
+ checkDebugInfoMetadata(
+ M, M.functions(), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
+ OrigDIVerifyBugsReportFilePath);
return PreservedAnalyses::all();
}
@@ -1006,13 +1017,15 @@ static bool isIgnoredPass(StringRef PassID) {
void DebugifyEachInstrumentation::registerCallbacks(
PassInstrumentationCallbacks &PIC) {
- PIC.registerBeforeNonSkippedPassCallback([](StringRef P, Any IR) {
+ PIC.registerBeforeNonSkippedPassCallback([this](StringRef P, Any IR) {
if (isIgnoredPass(P))
return;
if (any_isa<const Function *>(IR))
- applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR)));
+ applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR)),
+ Mode, DebugInfoBeforePass, P);
else if (any_isa<const Module *>(IR))
- applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR)));
+ applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR)),
+ Mode, DebugInfoBeforePass, P);
});
PIC.registerAfterPassCallback([this](StringRef P, Any IR,
const PreservedAnalyses &PassPA) {
@@ -1022,12 +1035,24 @@ void DebugifyEachInstrumentation::registerCallbacks(
auto &F = *const_cast<Function *>(any_cast<const Function *>(IR));
Module &M = *F.getParent();
auto It = F.getIterator();
- checkDebugifyMetadata(M, make_range(It, std::next(It)), P,
- "CheckFunctionDebugify", /*Strip=*/true, &StatsMap);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, make_range(It, std::next(It)), P,
+ "CheckFunctionDebugify", /*Strip=*/true, DIStatsMap);
+ else
+ checkDebugInfoMetadata(
+ M, make_range(It, std::next(It)), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)",
+ P, OrigDIVerifyBugsReportFilePath);
} else if (any_isa<const Module *>(IR)) {
auto &M = *const_cast<Module *>(any_cast<const Module *>(IR));
- checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
- /*Strip=*/true, &StatsMap);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
+ /*Strip=*/true, DIStatsMap);
+ else
+ checkDebugInfoMetadata(
+ M, M.functions(), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)",
+ P, OrigDIVerifyBugsReportFilePath);
}
});
}
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index cd3b6c1a095a..023a0afd329b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -402,7 +402,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
Optional<MDNode *> NewLoopID = makeFollowupLoopID(
LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
if (NewLoopID) {
- NewLoop->setLoopID(NewLoopID.getValue());
+ NewLoop->setLoopID(NewLoopID.value());
// Do not setLoopAlreadyUnrolled if loop attributes have been defined
// explicitly.
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index ec898c463574..82f993b4ceab 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -75,9 +75,6 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
if (isa<IndirectBrInst>(PredBB->getTerminator()))
// We cannot rewrite exiting edges from an indirectbr.
return false;
- if (isa<CallBrInst>(PredBB->getTerminator()))
- // We cannot rewrite exiting edges from a callbr.
- return false;
InLoopPredecessors.push_back(PredBB);
} else {
@@ -359,7 +356,7 @@ TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
Optional<int> Count =
getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
if (Count)
- return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+ return Count.value() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
return TM_ForcedByUser;
@@ -380,7 +377,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
Optional<int> Count =
getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
if (Count)
- return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+ return Count.value() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
return TM_ForcedByUser;
@@ -1246,6 +1243,20 @@ static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet)
return true;
}
+/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
+/// and returns true if this Phi is an induction phi in the loop. When
+/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
+static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
+ InductionDescriptor &ID) {
+ if (!Phi)
+ return false;
+ if (!L->getLoopPreheader())
+ return false;
+ if (Phi->getParent() != L->getHeader())
+ return false;
+ return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
+}
+
int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
ScalarEvolution *SE,
const TargetTransformInfo *TTI,
@@ -1297,6 +1308,46 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
if (!L->contains(Inst))
continue;
+ // Find exit values which are induction variables in the loop, and are
+ // unused in the loop, with the only use being the exit block PhiNode,
+ // and the induction variable update binary operator.
+ // The exit value can be replaced with the final value when it is cheap
+ // to do so.
+ if (ReplaceExitValue == UnusedIndVarInLoop) {
+ InductionDescriptor ID;
+ PHINode *IndPhi = dyn_cast<PHINode>(Inst);
+ if (IndPhi) {
+ if (!checkIsIndPhi(IndPhi, L, SE, ID))
+ continue;
+ // This is an induction PHI. Check that the only users are PHI
+ // nodes, and induction variable update binary operators.
+ if (llvm::any_of(Inst->users(), [&](User *U) {
+ if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
+ return true;
+ BinaryOperator *B = dyn_cast<BinaryOperator>(U);
+ if (B && B != ID.getInductionBinOp())
+ return true;
+ return false;
+ }))
+ continue;
+ } else {
+ // If it is not an induction phi, it must be an induction update
+ // binary operator with an induction phi user.
+ BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);
+ if (!B)
+ continue;
+ if (llvm::any_of(Inst->users(), [&](User *U) {
+ PHINode *Phi = dyn_cast<PHINode>(U);
+ if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
+ return true;
+ return false;
+ }))
+ continue;
+ if (B != ID.getInductionBinOp())
+ continue;
+ }
+ }
+
// Okay, this instruction has a user outside of the current loop
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible. We prefer to start with
@@ -1362,7 +1413,9 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
// Only do the rewrite when the ExitValue can be expanded cheaply.
// If LoopCanBeDel is true, rewrite exit value aggressively.
- if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost)
+ if ((ReplaceExitValue == OnlyCheapRepl ||
+ ReplaceExitValue == UnusedIndVarInLoop) &&
+ !LoopCanBeDel && Phi.HighCost)
continue;
Value *ExitVal = Rewriter.expandCodeFor(
diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
index 8641581c8039..9914a5ca6c5e 100644
--- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
@@ -74,6 +74,10 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
return Builder.CreateFAdd(Loaded, Inc, "new");
case AtomicRMWInst::FSub:
return Builder.CreateFSub(Loaded, Inc, "new");
+ case AtomicRMWInst::FMax:
+ return Builder.CreateMaxNum(Loaded, Inc);
+ case AtomicRMWInst::FMin:
+ return Builder.CreateMinNum(Loaded, Inc);
default:
llvm_unreachable("Unknown atomic op");
}
diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp
index b73d68ebec7c..4414b04c7264 100644
--- a/llvm/lib/Transforms/Utils/MisExpect.cpp
+++ b/llvm/lib/Transforms/Utils/MisExpect.cpp
@@ -221,7 +221,7 @@ void checkBackendInstrumentation(Instruction &I,
auto ExpectedWeightsOpt = extractWeights(&I, I.getContext());
if (!ExpectedWeightsOpt)
return;
- auto ExpectedWeights = ExpectedWeightsOpt.getValue();
+ auto ExpectedWeights = ExpectedWeightsOpt.value();
verifyMisExpect(I, RealWeights, ExpectedWeights);
}
@@ -230,7 +230,7 @@ void checkFrontendInstrumentation(Instruction &I,
auto RealWeightsOpt = extractWeights(&I, I.getContext());
if (!RealWeightsOpt)
return;
- auto RealWeights = RealWeightsOpt.getValue();
+ auto RealWeights = RealWeightsOpt.value();
verifyMisExpect(I, RealWeights, ExpectedWeights);
}
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 5120ade70e16..9e1492b97a86 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -255,7 +255,7 @@ void VFABI::setVectorVariantNames(CallInst *CI,
LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
Optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
assert(VI && "Cannot add an invalid VFABI name.");
- assert(M->getNamedValue(VI.getValue().VectorName) &&
+ assert(M->getNamedValue(VI.value().VectorName) &&
"Cannot add variant to attribute: "
"vector function declaration is missing.");
}
@@ -275,5 +275,13 @@ void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
GV->setSection(SectionName);
GV->setAlignment(Alignment);
+ LLVMContext &Ctx = M.getContext();
+ NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
+ Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
+ MDString::get(Ctx, SectionName)};
+
+ MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+ GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
+
appendToCompilerUsed(M, GV);
}
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index aff692b36288..bec1db896efb 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -488,31 +488,33 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
StoresByIndex,
std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)),
less_first());
+ Value *ReplVal;
if (I == StoresByIndex.begin()) {
if (StoresByIndex.empty())
// If there are no stores, the load takes the undef value.
- LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ ReplVal = UndefValue::get(LI->getType());
else
// There is no store before this load, bail out (load may be affected
// by the following stores - see main comment).
return false;
} else {
- // Otherwise, there was a store before this load, the load takes its value.
- // Note, if the load was marked as nonnull we don't want to lose that
- // information when we erase it. So we preserve it with an assume.
- Value *ReplVal = std::prev(I)->second->getOperand(0);
- if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
- !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
- addAssumeNonNull(AC, LI);
+ // Otherwise, there was a store before this load, the load takes its
+ // value.
+ ReplVal = std::prev(I)->second->getOperand(0);
+ }
- // If the replacement value is the load, this must occur in unreachable
- // code.
- if (ReplVal == LI)
- ReplVal = PoisonValue::get(LI->getType());
+ // Note, if the load was marked as nonnull we don't want to lose that
+ // information when we erase it. So we preserve it with an assume.
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
+ addAssumeNonNull(AC, LI);
- LI->replaceAllUsesWith(ReplVal);
- }
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = PoisonValue::get(LI->getType());
+ LI->replaceAllUsesWith(ReplVal);
LI->eraseFromParent();
LBI.deleteValue(LI);
}
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index eee91e70292e..09a83f1ea094 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -208,8 +208,6 @@ private:
if (!Elt)
LV.markOverdefined(); // Unknown sort of constant.
- else if (isa<UndefValue>(Elt))
- ; // Undef values remain unknown.
else
LV.markConstant(Elt); // Constants are constant.
}
@@ -356,8 +354,7 @@ public:
// We only track the contents of scalar globals.
if (GV->getValueType()->isSingleValueType()) {
ValueLatticeElement &IV = TrackedGlobals[GV];
- if (!isa<UndefValue>(GV->getInitializer()))
- IV.markConstant(GV->getInitializer());
+ IV.markConstant(GV->getInitializer());
}
}
@@ -822,9 +819,6 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
if (Constant *OpC = getConstant(OpSt)) {
// Fold the constant as we build.
Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
- if (isa<UndefValue>(C))
- return;
- // Propagate constant value
markConstant(&I, C);
} else if (I.getDestTy()->isIntegerTy()) {
auto &LV = getValueState(&I);
@@ -959,19 +953,15 @@ void SCCPInstVisitor::visitUnaryOperator(Instruction &I) {
if (isOverdefined(IV))
return (void)markOverdefined(&I);
- if (isConstant(V0State)) {
- Constant *C = ConstantExpr::get(I.getOpcode(), getConstant(V0State));
-
- // op Y -> undef.
- if (isa<UndefValue>(C))
- return;
- return (void)markConstant(IV, &I, C);
- }
-
- // If something is undef, wait for it to resolve.
- if (!isOverdefined(V0State))
+ // If something is unknown/undef, wait for it to resolve.
+ if (V0State.isUnknownOrUndef())
return;
+ if (isConstant(V0State))
+ if (Constant *C = ConstantFoldUnaryOpOperand(I.getOpcode(),
+ getConstant(V0State), DL))
+ return (void)markConstant(IV, &I, C);
+
markOverdefined(&I);
}
@@ -999,9 +989,6 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
Value *R = simplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL));
auto *C = dyn_cast_or_null<Constant>(R);
if (C) {
- // X op Y -> undef.
- if (isa<UndefValue>(C))
- return;
// Conservatively assume that the result may be based on operands that may
// be undef. Note that we use mergeInValue to combine the constant with
// the existing lattice value for I, as different constants might be found
@@ -1050,6 +1037,7 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) {
Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State);
if (C) {
+ // TODO: getCompare() currently has incorrect handling for unknown/undef.
if (isa<UndefValue>(C))
return;
ValueLatticeElement CV;
@@ -1095,8 +1083,6 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
Constant *C =
ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
- if (isa<UndefValue>(C))
- return;
markConstant(&I, C);
}
@@ -1174,11 +1160,8 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
}
// Transform load from a constant into a constant if possible.
- if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL)) {
- if (isa<UndefValue>(C))
- return;
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL))
return (void)markConstant(IV, &I, C);
- }
}
// Fall back to metadata.
@@ -1223,12 +1206,8 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F))) {
- // call -> undef.
- if (isa<UndefValue>(C))
- return;
+ if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F)))
return (void)markConstant(&CB, C);
- }
}
// Fall back to metadata.
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 401f1ee5a55d..0c8bf3827256 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -220,7 +220,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
// Fold a binop with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(LHS))
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantExpr::get(Opcode, CLHS, CRHS);
+ if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
+ return Res;
// Do a quick scan to see if we have this binop nearby. If so, reuse it.
unsigned ScanLimit = 6;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 567b866f7777..4b5ade99767b 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -377,18 +377,12 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
/// expensive.
static InstructionCost computeSpeculationCost(const User *I,
const TargetTransformInfo &TTI) {
- assert(isSafeToSpeculativelyExecute(I) &&
+ assert((!isa<Instruction>(I) ||
+ isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
"Instruction is not safe to speculatively execute!");
return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
}
-/// Check whether this is a potentially trapping constant.
-static bool canTrap(const Value *V) {
- if (auto *C = dyn_cast<Constant>(V))
- return C->canTrap();
- return false;
-}
-
/// If we have a merge point of an "if condition" as accepted above,
/// return true if the specified value dominates the block. We
/// don't handle the true generality of domination here, just a special case
@@ -421,9 +415,9 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB,
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
- // Non-instructions all dominate instructions, but not all constantexprs
- // can be executed unconditionally.
- return !canTrap(V);
+ // Non-instructions dominate all instructions and can be executed
+ // unconditionally.
+ return true;
}
BasicBlock *PBB = I->getParent();
@@ -1473,10 +1467,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
while (isa<DbgInfoIntrinsic>(I2))
I2 = &*BB2_Itr++;
}
- // FIXME: Can we define a safety predicate for CallBr?
- if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
- (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) ||
- isa<CallBrInst>(I1))
+ if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2))
return false;
BasicBlock *BIParent = BI->getParent();
@@ -1609,11 +1600,6 @@ HoistTerminator:
if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
passingValueIsAlwaysUndefined(BB2V, &PN))
return Changed;
-
- if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
- return Changed;
- if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
- return Changed;
}
}
@@ -2679,9 +2665,6 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
passingValueIsAlwaysUndefined(ThenV, &PN))
return false;
- if (canTrap(OrigV) || canTrap(ThenV))
- return false;
-
HaveRewritablePHIs = true;
ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
@@ -2979,10 +2962,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
return true;
}
-static ConstantInt *
-getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To,
- SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>,
- ConstantInt *> &Visited) {
+static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
+ BasicBlock *To) {
// Don't look past the block defining the value, we might get the value from
// a previous loop iteration.
auto *I = dyn_cast<Instruction>(V);
@@ -2996,23 +2977,7 @@ getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To,
return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
: ConstantInt::getFalse(BI->getContext());
- // Limit the amount of blocks we inspect.
- if (Visited.size() >= 8)
- return nullptr;
-
- auto Pair = Visited.try_emplace({From, To}, nullptr);
- if (!Pair.second)
- return Pair.first->second;
-
- // Check whether the known value is the same for all predecessors.
- ConstantInt *Common = nullptr;
- for (BasicBlock *Pred : predecessors(From)) {
- ConstantInt *C = getKnownValueOnEdge(V, Pred, From, Visited);
- if (!C || (Common && Common != C))
- return nullptr;
- Common = C;
- }
- return Visited[{From, To}] = Common;
+ return nullptr;
}
/// If we have a conditional branch on something for which we know the constant
@@ -3022,7 +2987,7 @@ static Optional<bool>
FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
const DataLayout &DL,
AssumptionCache *AC) {
- SmallMapVector<BasicBlock *, ConstantInt *, 8> KnownValues;
+ SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
BasicBlock *BB = BI->getParent();
Value *Cond = BI->getCondition();
PHINode *PN = dyn_cast<PHINode>(Cond);
@@ -3035,12 +3000,11 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
for (Use &U : PN->incoming_values())
if (auto *CB = dyn_cast<ConstantInt>(U))
- KnownValues.insert({PN->getIncomingBlock(U), CB});
+ KnownValues[CB].insert(PN->getIncomingBlock(U));
} else {
- SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, ConstantInt *> Visited;
for (BasicBlock *Pred : predecessors(BB)) {
- if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB, Visited))
- KnownValues.insert({Pred, CB});
+ if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
+ KnownValues[CB].insert(Pred);
}
}
@@ -3056,29 +3020,34 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
for (const auto &Pair : KnownValues) {
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
- ConstantInt *CB = Pair.second;
- BasicBlock *PredBB = Pair.first;
+ ConstantInt *CB = Pair.first;
+ ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
if (RealDest == BB)
continue; // Skip self loops.
+
// Skip if the predecessor's terminator is an indirect branch.
- if (isa<IndirectBrInst>(PredBB->getTerminator()))
+ if (any_of(PredBBs, [](BasicBlock *PredBB) {
+ return isa<IndirectBrInst>(PredBB->getTerminator());
+ }))
continue;
- SmallVector<DominatorTree::UpdateType, 3> Updates;
+ LLVM_DEBUG({
+ dbgs() << "Condition " << *Cond << " in " << BB->getName()
+ << " has value " << *Pair.first << " in predecessors:\n";
+ for (const BasicBlock *PredBB : Pair.second)
+ dbgs() << " " << PredBB->getName() << "\n";
+ dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
+ });
+
+ // Split the predecessors we are threading into a new edge block. We'll
+ // clone the instructions into this block, and then redirect it to RealDest.
+ BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
- // The dest block might have PHI nodes, other predecessors and other
- // difficult cases. Instead of being smart about this, just insert a new
- // block that jumps to the destination block, effectively splitting
- // the edge we are about to create.
- BasicBlock *EdgeBB =
- BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
- RealDest->getParent(), RealDest);
- BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
- if (DTU)
- Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
- CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
+ // TODO: These just exist to reduce test diff, we can drop them if we like.
+ EdgeBB->setName(RealDest->getName() + ".critedge");
+ EdgeBB->moveBefore(RealDest);
// Update PHI nodes.
AddPredecessorToBlock(RealDest, EdgeBB, BB);
@@ -3086,12 +3055,12 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
// BB may have instructions that are being threaded over. Clone these
// instructions into EdgeBB. We know that there will be no uses of the
// cloned instructions outside of EdgeBB.
- BasicBlock::iterator InsertPt = EdgeBB->begin();
+ BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
DenseMap<Value *, Value *> TranslateMap; // Track translated values.
- TranslateMap[Cond] = Pair.second;
+ TranslateMap[Cond] = CB;
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+ TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
continue;
}
// Clone the instruction.
@@ -3129,19 +3098,15 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
}
}
- // Loop over all of the edges from PredBB to BB, changing them to branch
- // to EdgeBB instead.
- Instruction *PredBBTI = PredBB->getTerminator();
- for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
- if (PredBBTI->getSuccessor(i) == BB) {
- BB->removePredecessor(PredBB);
- PredBBTI->setSuccessor(i, EdgeBB);
- }
+ BB->removePredecessor(EdgeBB);
+ BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
+ EdgeBI->setSuccessor(0, RealDest);
+ EdgeBI->setDebugLoc(BI->getDebugLoc());
if (DTU) {
- Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB});
- Updates.push_back({DominatorTree::Delete, PredBB, BB});
-
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
+ Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
DTU->applyUpdates(Updates);
}
@@ -3599,13 +3564,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
Cond->getParent() != BB || !Cond->hasOneUse())
return false;
- // Cond is known to be a compare or binary operator. Check to make sure that
- // neither operand is a potentially-trapping constant expression.
- if (canTrap(Cond->getOperand(0)))
- return false;
- if (canTrap(Cond->getOperand(1)))
- return false;
-
// Finally, don't infinitely unroll conditional loops.
if (is_contained(successors(BB), BB))
return false;
@@ -4113,9 +4071,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
return true;
- if (canTrap(BI->getCondition()))
- return false;
-
// If both branches are conditional and both contain stores to the same
// address, remove the stores from the conditionals and create a conditional
// merged store at the end.
@@ -4157,10 +4112,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// insertion of a large number of select instructions. For targets
// without predication/cmovs, this is a big pessimization.
- // Also do not perform this transformation if any phi node in the common
- // destination block can trap when reached by BB or PBB (PR17073). In that
- // case, it would be unsafe to hoist the operation into a select instruction.
-
BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
unsigned NumPhis = 0;
@@ -4168,16 +4119,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
++II, ++NumPhis) {
if (NumPhis > 2) // Disable this xform.
return false;
-
- PHINode *PN = cast<PHINode>(II);
- Value *BIV = PN->getIncomingValueForBlock(BB);
- if (canTrap(BIV))
- return false;
-
- unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
- Value *PBIV = PN->getIncomingValue(PBBIdx);
- if (canTrap(PBIV))
- return false;
}
// Finally, if everything is ok, fold the branches to logical ops.
@@ -6174,6 +6115,23 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
return isSwitchDense(SI->getNumCases(), TableSize);
}
+static bool ShouldUseSwitchConditionAsTableIndex(
+ ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
+ bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
+ const DataLayout &DL, const TargetTransformInfo &TTI) {
+ if (MinCaseVal.isNullValue())
+ return true;
+ if (MinCaseVal.isNegative() ||
+ MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
+ !HasDefaultResults)
+ return false;
+ return all_of(ResultTypes, [&](const auto &KV) {
+ return SwitchLookupTable::WouldFitInRegister(
+ DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
+ KV.second /* ResultType */);
+ });
+}
+
/// Try to reuse the switch table index compare. Following pattern:
/// \code
/// if (idx < tablesize)
@@ -6329,9 +6287,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
}
uint64_t NumResults = ResultLists[PHIs[0]].size();
- APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
- uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
- bool TableHasHoles = (NumResults < TableSize);
// If the table has holes, we need a constant result for the default case
// or a bitmask that fits in a register.
@@ -6340,6 +6295,22 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
DefaultResultsList, DL, TTI);
+ for (const auto &I : DefaultResultsList) {
+ PHINode *PHI = I.first;
+ Constant *Result = I.second;
+ DefaultResults[PHI] = Result;
+ }
+
+ bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
+ *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
+ uint64_t TableSize;
+ if (UseSwitchConditionAsTableIndex)
+ TableSize = MaxCaseVal->getLimitedValue() + 1;
+ else
+ TableSize =
+ (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
+
+ bool TableHasHoles = (NumResults < TableSize);
bool NeedMask = (TableHasHoles && !HasDefaultResults);
if (NeedMask) {
// As an extra penalty for the validity test we require more cases.
@@ -6349,12 +6320,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
return false;
}
- for (const auto &I : DefaultResultsList) {
- PHINode *PHI = I.first;
- Constant *Result = I.second;
- DefaultResults[PHI] = Result;
- }
-
if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
return false;
@@ -6368,11 +6333,15 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Compute the table index value.
Builder.SetInsertPoint(SI);
Value *TableIndex;
- if (MinCaseVal->isNullValue())
+ ConstantInt *TableIndexOffset;
+ if (UseSwitchConditionAsTableIndex) {
+ TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0);
TableIndex = SI->getCondition();
- else
- TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
- "switch.tableidx");
+ } else {
+ TableIndexOffset = MinCaseVal;
+ TableIndex =
+ Builder.CreateSub(SI->getCondition(), TableIndexOffset, "switch.tableidx");
+ }
// Compute the maximum table size representable by the integer type we are
// switching upon.
@@ -6424,7 +6393,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Build bitmask; fill in a 1 bit for every case.
const ResultListTy &ResultList = ResultLists[PHIs[0]];
for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
- uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue())
+ uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
.getLimitedValue();
MaskInt |= One << Idx;
}
@@ -6463,8 +6432,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If using a bitmask, use any value to fill the lookup table holes.
Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
StringRef FuncName = Fn->getName();
- SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
- FuncName);
+ SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
+ DL, FuncName);
Value *Result = Table.BuildLookup(TableIndex, Builder);
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index dbef1ff2e739..af15e0c31b75 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -79,21 +79,23 @@ namespace {
bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
+ bool replaceFloatIVWithIntegerIV(Instruction *UseInst);
bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
bool eliminateTrunc(TruncInst *TI);
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
- bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);
- void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
- void simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+ bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
+ void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
+ void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
bool IsSigned);
void replaceRemWithNumerator(BinaryOperator *Rem);
void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
void replaceSRemWithURem(BinaryOperator *Rem);
bool eliminateSDiv(BinaryOperator *SDiv);
- bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
- bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand);
+ bool strengthenOverflowingOperation(BinaryOperator *OBO,
+ Instruction *IVOperand);
+ bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand);
};
}
@@ -192,7 +194,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
}
bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
- Value *IVOperand) {
+ Instruction *IVOperand) {
unsigned IVOperIdx = 0;
ICmpInst::Predicate Pred = ICmp->getPredicate();
if (IVOperand != ICmp->getOperand(0)) {
@@ -261,7 +263,8 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
/// SimplifyIVUsers helper for eliminating useless
/// comparisons against an induction variable.
-void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
+ Instruction *IVOperand) {
unsigned IVOperIdx = 0;
ICmpInst::Predicate Pred = ICmp->getPredicate();
ICmpInst::Predicate OriginalPred = Pred;
@@ -372,7 +375,8 @@ void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
/// SimplifyIVUsers helper for eliminating useless remainder operations
/// operating on an induction variable or replacing srem by urem.
-void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem,
+ Instruction *IVOperand,
bool IsSigned) {
auto *NValue = Rem->getOperand(0);
auto *DValue = Rem->getOperand(1);
@@ -673,6 +677,35 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
return true;
}
+/// Eliminate redundant type cast between integer and float.
+bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
+ if (UseInst->getOpcode() != CastInst::SIToFP)
+ return false;
+
+ Value *IVOperand = UseInst->getOperand(0);
+ // Get the symbolic expression for this instruction.
+ ConstantRange IVRange = SE->getSignedRange(SE->getSCEV(IVOperand));
+ unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
+ if (IVRange.getActiveBits() <= DestNumSigBits) {
+ for (User *U : UseInst->users()) {
+ // Match for fptosi of sitofp and with same type.
+ auto *CI = dyn_cast<FPToSIInst>(U);
+ if (!CI || IVOperand->getType() != CI->getType())
+ continue;
+
+ CI->replaceAllUsesWith(IVOperand);
+ DeadInsts.push_back(CI);
+ LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
+ << " with: " << *IVOperand << '\n');
+
+ ++NumFoldedUser;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
/// Eliminate any operation that SCEV can prove is an identity function.
bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
Instruction *IVOperand) {
@@ -718,18 +751,16 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
/// unsigned-overflow. Returns true if anything changed, false otherwise.
bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
- Value *IVOperand) {
- SCEV::NoWrapFlags Flags;
- bool Deduced;
- std::tie(Flags, Deduced) = SE->getStrengthenedNoWrapFlagsFromBinOp(
+ Instruction *IVOperand) {
+ auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp(
cast<OverflowingBinaryOperator>(BO));
- if (!Deduced)
- return Deduced;
+ if (!Flags)
+ return false;
- BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNUW) ==
+ BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) ==
SCEV::FlagNUW);
- BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNSW) ==
+ BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) ==
SCEV::FlagNSW);
// The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
@@ -737,14 +768,14 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
// forgetValue() here to make sure those flags also propagate to any other
// SCEV expressions based on the addrec. However, this can have pathological
// compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
- return Deduced;
+ return true;
}
/// Annotate the Shr in (X << IVOperand) >> C as exact using the
/// information from the IV's range. Returns true if anything changed, false
/// otherwise.
bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
- Value *IVOperand) {
+ Instruction *IVOperand) {
using namespace llvm::PatternMatch;
if (BO->getOpcode() == Instruction::Shl) {
@@ -896,6 +927,13 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
}
}
+ // Try to use integer induction for FPToSI of float induction directly.
+ if (replaceFloatIVWithIntegerIV(UseInst)) {
+ // Re-queue the potentially new direct uses of IVOperand.
+ pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
+ continue;
+ }
+
CastInst *Cast = dyn_cast<CastInst>(UseInst);
if (V && Cast) {
V->visitCast(Cast);
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index f4306bb43dfd..b359717424a6 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -75,7 +75,8 @@ static bool callHasFP128Argument(const CallInst *CI) {
});
}
-static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
+static Value *convertStrToNumber(CallInst *CI, StringRef &Str, Value *EndPtr,
+ int64_t Base, IRBuilderBase &B) {
if (Base < 2 || Base > 36)
// handle special zero base
if (Base != 0)
@@ -97,6 +98,15 @@ static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
if (!isIntN(CI->getType()->getPrimitiveSizeInBits(), Result))
return nullptr;
+ if (EndPtr) {
+ // Store the pointer to the end.
+ uint64_t ILen = End - nptr.c_str();
+ Value *Off = B.getInt64(ILen);
+ Value *StrBeg = CI->getArgOperand(0);
+ Value *StrEnd = B.CreateInBoundsGEP(B.getInt8Ty(), StrBeg, Off, "endptr");
+ B.CreateStore(StrEnd, EndPtr);
+ }
+
return ConstantInt::get(CI->getType(), Result);
}
@@ -295,31 +305,69 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B));
}
+// Helper to transform memchr(S, C, N) == S to N && *S == C and, when
+// NBytes is null, strchr(S, C) to *S == C. A precondition of the function
+// is that either S is dereferenceable or the value of N is nonzero.
+static Value* memChrToCharCompare(CallInst *CI, Value *NBytes,
+ IRBuilderBase &B, const DataLayout &DL)
+{
+ Value *Src = CI->getArgOperand(0);
+ Value *CharVal = CI->getArgOperand(1);
+
+ // Fold memchr(A, C, N) == A to N && *A == C.
+ Type *CharTy = B.getInt8Ty();
+ Value *Char0 = B.CreateLoad(CharTy, Src);
+ CharVal = B.CreateTrunc(CharVal, CharTy);
+ Value *Cmp = B.CreateICmpEQ(Char0, CharVal, "char0cmp");
+
+ if (NBytes) {
+ Value *Zero = ConstantInt::get(NBytes->getType(), 0);
+ Value *And = B.CreateICmpNE(NBytes, Zero);
+ Cmp = B.CreateLogicalAnd(And, Cmp);
+ }
+
+ Value *NullPtr = Constant::getNullValue(CI->getType());
+ return B.CreateSelect(Cmp, Src, NullPtr);
+}
+
Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
Value *SrcStr = CI->getArgOperand(0);
+ Value *CharVal = CI->getArgOperand(1);
annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+ return memChrToCharCompare(CI, nullptr, B, DL);
+
// If the second operand is non-constant, see if we can compute the length
// of the input string and turn this into memchr.
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
if (!CharC) {
uint64_t Len = GetStringLength(SrcStr);
if (Len)
annotateDereferenceableBytes(CI, 0, Len);
else
return nullptr;
+
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
return nullptr;
return copyFlags(
*CI,
- emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ emitMemChr(SrcStr, CharVal, // include nul.
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), B,
DL, TLI));
}
+ if (CharC->isZero()) {
+ Value *NullPtr = Constant::getNullValue(CI->getType());
+ if (isOnlyUsedInEqualityComparison(CI, NullPtr))
+ // Pre-empt the transformation to strlen below and fold
+ // strchr(A, '\0') == null to false.
+ return B.CreateIntToPtr(B.getTrue(), CI->getType());
+ }
+
// Otherwise, the character is a constant, see if the first argument is
// a string literal. If so, we can constant fold.
StringRef Str;
@@ -1008,8 +1056,12 @@ Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
Value *SrcStr = CI->getArgOperand(0);
Value *Size = CI->getArgOperand(2);
- if (isKnownNonZero(Size, DL))
+
+ if (isKnownNonZero(Size, DL)) {
annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+ return memChrToCharCompare(CI, Size, B, DL);
+ }
Value *CharVal = CI->getArgOperand(1);
ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
@@ -1099,9 +1151,16 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2");
}
- if (!LenC)
+ if (!LenC) {
+ if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+ // S is dereferenceable so it's safe to load from it and fold
+ // memchr(S, C, N) == S to N && *S == C for any C and N.
+ // TODO: This is safe even even for nonconstant S.
+ return memChrToCharCompare(CI, Size, B, DL);
+
// From now on we need a constant length and constant array.
return nullptr;
+ }
// If the char is variable but the input str and length are not we can turn
// this memchr call into a simple bit field test. Of course this only works
@@ -1589,31 +1648,6 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
return nullptr;
}
-static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilderBase &B) {
- // Multiplications calculated using Addition Chains.
- // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
-
- assert(Exp != 0 && "Incorrect exponent 0 not handled");
-
- if (InnerChain[Exp])
- return InnerChain[Exp];
-
- static const unsigned AddChain[33][2] = {
- {0, 0}, // Unused.
- {0, 0}, // Unused (base case = pow1).
- {1, 1}, // Unused (pre-computed).
- {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4},
- {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7},
- {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10},
- {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13},
- {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16},
- };
-
- InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B),
- getPow(InnerChain, AddChain[Exp][1], B));
- return InnerChain[Exp];
-}
-
// Return a properly extended integer (DstWidth bits wide) if the operation is
// an itofp.
static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
@@ -1914,70 +1948,52 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
if (Value *Sqrt = replacePowWithSqrt(Pow, B))
return Sqrt;
- // pow(x, n) -> x * x * x * ...
+ // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction
const APFloat *ExpoF;
- if (AllowApprox && match(Expo, m_APFloat(ExpoF)) &&
- !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) {
- // We limit to a max of 7 multiplications, thus the maximum exponent is 32.
- // If the exponent is an integer+0.5 we generate a call to sqrt and an
- // additional fmul.
- // TODO: This whole transformation should be backend specific (e.g. some
- // backends might prefer libcalls or the limit for the exponent might
- // be different) and it should also consider optimizing for size.
- APFloat LimF(ExpoF->getSemantics(), 33),
- ExpoA(abs(*ExpoF));
- if (ExpoA < LimF) {
- // This transformation applies to integer or integer+0.5 exponents only.
- // For integer+0.5, we create a sqrt(Base) call.
- Value *Sqrt = nullptr;
- if (!ExpoA.isInteger()) {
- APFloat Expo2 = ExpoA;
- // To check if ExpoA is an integer + 0.5, we add it to itself. If there
- // is no floating point exception and the result is an integer, then
- // ExpoA == integer + 0.5
- if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
- return nullptr;
-
- if (!Expo2.isInteger())
- return nullptr;
-
- Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
- Pow->doesNotAccessMemory(), M, B, TLI);
- if (!Sqrt)
- return nullptr;
- }
-
- // We will memoize intermediate products of the Addition Chain.
- Value *InnerChain[33] = {nullptr};
- InnerChain[1] = Base;
- InnerChain[2] = B.CreateFMul(Base, Base, "square");
-
- // We cannot readily convert a non-double type (like float) to a double.
- // So we first convert it to something which could be converted to double.
- ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
- Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
+ if (match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) &&
+ !ExpoF->isExactlyValue(-0.5)) {
+ APFloat ExpoA(abs(*ExpoF));
+ APFloat ExpoI(*ExpoF);
+ Value *Sqrt = nullptr;
+ if (AllowApprox && !ExpoA.isInteger()) {
+ APFloat Expo2 = ExpoA;
+ // To check if ExpoA is an integer + 0.5, we add it to itself. If there
+ // is no floating point exception and the result is an integer, then
+ // ExpoA == integer + 0.5
+ if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
+ return nullptr;
- // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x).
- if (Sqrt)
- FMul = B.CreateFMul(FMul, Sqrt);
+ if (!Expo2.isInteger())
+ return nullptr;
- // If the exponent is negative, then get the reciprocal.
- if (ExpoF->isNegative())
- FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
+ if (ExpoI.roundToIntegral(APFloat::rmTowardNegative) !=
+ APFloat::opInexact)
+ return nullptr;
+ if (!ExpoI.isInteger())
+ return nullptr;
+ ExpoF = &ExpoI;
- return FMul;
+ Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
+ Pow->doesNotAccessMemory(), M, B, TLI);
+ if (!Sqrt)
+ return nullptr;
}
+ // pow(x, n) -> powi(x, n) if n is a constant signed integer value
APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
- // powf(x, n) -> powi(x, n) if n is a constant signed integer value
if (ExpoF->isInteger() &&
ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
APFloat::opOK) {
- return copyFlags(
+ Value *PowI = copyFlags(
*Pow,
createPowWithIntegerExponent(
Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo),
M, B));
+
+ if (PowI && Sqrt)
+ return B.CreateFMul(PowI, Sqrt);
+
+ return PowI;
}
}
@@ -2517,7 +2533,7 @@ Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) {
if (!getConstantStringInfo(CI->getArgOperand(0), Str))
return nullptr;
- return convertStrToNumber(CI, Str, 10);
+ return convertStrToNumber(CI, Str, nullptr, 10, B);
}
Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilderBase &B) {
@@ -2525,11 +2541,14 @@ Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilderBase &B) {
if (!getConstantStringInfo(CI->getArgOperand(0), Str))
return nullptr;
- if (!isa<ConstantPointerNull>(CI->getArgOperand(1)))
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr))
+ EndPtr = nullptr;
+ else if (!isKnownNonZero(EndPtr, DL))
return nullptr;
if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) {
- return convertStrToNumber(CI, Str, CInt->getSExtValue());
+ return convertStrToNumber(CI, Str, EndPtr, CInt->getSExtValue(), B);
}
return nullptr;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 6242d9a93fc1..183ba86abcb4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -386,20 +386,6 @@ static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
return true;
}
-/// Check whether it is safe to if-convert this phi node.
-///
-/// Phi nodes with constant expressions that can trap are not safe to if
-/// convert.
-static bool canIfConvertPHINodes(BasicBlock *BB) {
- for (PHINode &Phi : BB->phis()) {
- for (Value *V : Phi.incoming_values())
- if (auto *C = dyn_cast<Constant>(V))
- if (C->canTrap())
- return false;
- }
- return true;
-}
-
static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
if (Ty->isPointerTy())
return DL.getIntPtrType(Ty);
@@ -993,7 +979,6 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
}
- Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
PSE.addPredicate(LAI->getPSE().getPredicate());
return true;
}
@@ -1098,13 +1083,6 @@ bool LoopVectorizationLegality::blockCanBePredicated(
SmallPtrSetImpl<const Instruction *> &MaskedOp,
SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
for (Instruction &I : *BB) {
- // Check that we don't have a constant expression that can trap as operand.
- for (Value *Operand : I.operands()) {
- if (auto *C = dyn_cast<Constant>(Operand))
- if (C->canTrap())
- return false;
- }
-
// We can predicate blocks with calls to assume, as long as we drop them in
// case we flatten the CFG via predication.
if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
@@ -1190,7 +1168,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
}
// Collect the blocks that need predication.
- BasicBlock *Header = TheLoop->getHeader();
for (BasicBlock *BB : TheLoop->blocks()) {
// We don't support switch statements inside loops.
if (!isa<BranchInst>(BB->getTerminator())) {
@@ -1212,13 +1189,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
BB->getTerminator());
return false;
}
- } else if (BB != Header && !canIfConvertPHINodes(BB)) {
- reportVectorizationFailure(
- "Control flow cannot be substituted for a select",
- "control flow cannot be substituted for a select",
- "NoCFGForSelect", ORE, TheLoop,
- BB->getTerminator());
- return false;
}
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 0cb2032fa45a..2e9a9fe0640e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -33,7 +33,6 @@ class LoopInfo;
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class PredicatedScalarEvolution;
-class LoopVectorizationRequirements;
class LoopVectorizeHints;
class OptimizationRemarkEmitter;
class TargetTransformInfo;
@@ -46,8 +45,9 @@ class VPBuilder {
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
VPInstruction *createInstruction(unsigned Opcode,
- ArrayRef<VPValue *> Operands, DebugLoc DL) {
- VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL);
+ ArrayRef<VPValue *> Operands, DebugLoc DL,
+ const Twine &Name = "") {
+ VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL, Name);
if (BB)
BB->insert(Instr, InsertPt);
return Instr;
@@ -55,8 +55,8 @@ class VPBuilder {
VPInstruction *createInstruction(unsigned Opcode,
std::initializer_list<VPValue *> Operands,
- DebugLoc DL) {
- return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL);
+ DebugLoc DL, const Twine &Name = "") {
+ return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name);
}
public:
@@ -124,34 +124,37 @@ public:
/// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
/// its underlying Instruction.
VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
- Instruction *Inst = nullptr) {
+ Instruction *Inst = nullptr, const Twine &Name = "") {
DebugLoc DL;
if (Inst)
DL = Inst->getDebugLoc();
- VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL);
+ VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name);
NewVPInst->setUnderlyingValue(Inst);
return NewVPInst;
}
VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
- DebugLoc DL) {
- return createInstruction(Opcode, Operands, DL);
+ DebugLoc DL, const Twine &Name = "") {
+ return createInstruction(Opcode, Operands, DL, Name);
}
- VPValue *createNot(VPValue *Operand, DebugLoc DL) {
- return createInstruction(VPInstruction::Not, {Operand}, DL);
+ VPValue *createNot(VPValue *Operand, DebugLoc DL, const Twine &Name = "") {
+ return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
}
- VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL) {
- return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL);
+ VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL,
+ const Twine &Name = "") {
+ return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
}
- VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL) {
- return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL);
+ VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL,
+ const Twine &Name = "") {
+ return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name);
}
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
- DebugLoc DL) {
- return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL);
+ DebugLoc DL, const Twine &Name = "") {
+ return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL,
+ Name);
}
//===--------------------------------------------------------------------===//
@@ -191,6 +194,10 @@ struct VectorizationFactor {
/// Cost of the scalar loop.
InstructionCost ScalarCost;
+ /// The minimum trip count required to make vectorization profitable, e.g. due
+ /// to runtime checks.
+ ElementCount MinProfitableTripCount;
+
VectorizationFactor(ElementCount Width, InstructionCost Cost,
InstructionCost ScalarCost)
: Width(Width), Cost(Cost), ScalarCost(ScalarCost) {}
@@ -268,8 +275,6 @@ class LoopVectorizationPlanner {
const LoopVectorizeHints &Hints;
- LoopVectorizationRequirements &Requirements;
-
OptimizationRemarkEmitter *ORE;
SmallVector<VPlanPtr, 4> VPlans;
@@ -285,10 +290,9 @@ public:
InterleavedAccessInfo &IAI,
PredicatedScalarEvolution &PSE,
const LoopVectorizeHints &Hints,
- LoopVectorizationRequirements &Requirements,
OptimizationRemarkEmitter *ORE)
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
- PSE(PSE), Hints(Hints), Requirements(Requirements), ORE(ORE) {}
+ PSE(PSE), Hints(Hints), ORE(ORE) {}
/// Plan how to best vectorize, return the best VF and its cost, or None if
/// vectorization and interleaving should be avoided up front.
@@ -332,11 +336,6 @@ public:
bool requiresTooManyRuntimeChecks() const;
protected:
- /// Collect the instructions from the original loop that would be trivially
- /// dead in the vectorized loop if generated.
- void collectTriviallyDeadInstructions(
- SmallPtrSetImpl<Instruction *> &DeadInstructions);
-
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
/// legal to vectorize the loop.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b637b2d5ddae..0777a1385916 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -196,10 +196,9 @@ static cl::opt<unsigned> TinyTripCountVectorThreshold(
"value are vectorized only if no scalar iteration overheads "
"are incurred."));
-static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
- "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
- cl::desc("The maximum allowed number of runtime memory checks with a "
- "vectorize(enable) pragma."));
+static cl::opt<unsigned> VectorizeMemoryCheckThreshold(
+ "vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
+ cl::desc("The maximum allowed number of runtime memory checks"));
// Option prefer-predicate-over-epilogue indicates that an epilogue is undesired,
// that predication is preferred, and this lists all options. I.e., the
@@ -442,6 +441,7 @@ public:
const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, ElementCount VecWidth,
+ ElementCount MinProfitableTripCount,
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks)
@@ -453,6 +453,11 @@ public:
// of the original loop header may change as the transformation happens.
OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize(
OrigLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
+
+ if (MinProfitableTripCount.isZero())
+ this->MinProfitableTripCount = VecWidth;
+ else
+ this->MinProfitableTripCount = MinProfitableTripCount;
}
virtual ~InnerLoopVectorizer() = default;
@@ -656,6 +661,8 @@ protected:
/// vector elements.
ElementCount VF;
+ ElementCount MinProfitableTripCount;
+
/// The vectorization unroll factor to use. Each scalar is vectorized to this
/// many different vector instructions.
unsigned UF;
@@ -735,6 +742,7 @@ public:
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, GeneratedRTChecks &Check)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
+ ElementCount::getFixed(1),
ElementCount::getFixed(1), UnrollFactor, LVL, CM,
BFI, PSI, Check) {}
@@ -783,8 +791,8 @@ public:
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &Checks)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI.MainLoopVF, EPI.MainLoopUF, LVL, CM, BFI, PSI,
- Checks),
+ EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL,
+ CM, BFI, PSI, Checks),
EPI(EPI) {}
// Override this function to handle the more complex control flow around the
@@ -1018,7 +1026,8 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
if (isa<VPWidenMemoryInstructionRecipe>(CurRec) ||
isa<VPInterleaveRecipe>(CurRec) ||
isa<VPScalarIVStepsRecipe>(CurRec) ||
- isa<VPCanonicalIVPHIRecipe>(CurRec))
+ isa<VPCanonicalIVPHIRecipe>(CurRec) ||
+ isa<VPActiveLaneMaskPHIRecipe>(CurRec))
continue;
// This recipe contributes to the address computation of a widen
@@ -1503,6 +1512,13 @@ public:
/// Returns true if all loop blocks should be masked to fold tail loop.
bool foldTailByMasking() const { return FoldTailByMasking; }
+ /// Returns true if were tail-folding and want to use the active lane mask
+ /// for vector loop control flow.
+ bool useActiveLaneMaskForControlFlow() const {
+ return FoldTailByMasking &&
+ TTI.emitGetActiveLaneMask() == PredicationStyle::DataAndControlFlow;
+ }
+
/// Returns true if the instructions in this block requires predication
/// for any reason, e.g. because tail folding now requires a predicate
/// or because the block in the original loop was predicated.
@@ -1551,14 +1567,14 @@ public:
Scalars.clear();
}
-private:
- unsigned NumPredStores = 0;
-
/// Convenience function that returns the value of vscale_range iff
/// vscale_range.min == vscale_range.max or otherwise returns the value
/// returned by the corresponding TLI method.
Optional<unsigned> getVScaleForTuning() const;
+private:
+ unsigned NumPredStores = 0;
+
/// \return An upper bound for the vectorization factors for both
/// fixed and scalable vectorization, where the minimum-known number of
/// elements is a power-of-2 larger than zero. If scalable vectorization is
@@ -1661,7 +1677,8 @@ private:
/// A set containing all BasicBlocks that are known to present after
/// vectorization as a predicated block.
- SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization;
+ DenseMap<ElementCount, SmallPtrSet<BasicBlock *, 4>>
+ PredicatedBBsAfterVectorization;
/// Records whether it is allowed to have the original scalar loop execute at
/// least once. This may be needed as a fallback loop in case runtime
@@ -1849,14 +1866,17 @@ class GeneratedRTChecks {
DominatorTree *DT;
LoopInfo *LI;
+ TargetTransformInfo *TTI;
SCEVExpander SCEVExp;
SCEVExpander MemCheckExp;
+ bool CostTooHigh = false;
+
public:
GeneratedRTChecks(ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI,
- const DataLayout &DL)
- : DT(DT), LI(LI), SCEVExp(SE, DL, "scev.check"),
+ TargetTransformInfo *TTI, const DataLayout &DL)
+ : DT(DT), LI(LI), TTI(TTI), SCEVExp(SE, DL, "scev.check"),
MemCheckExp(SE, DL, "scev.check") {}
/// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can
@@ -1867,6 +1887,15 @@ public:
void Create(Loop *L, const LoopAccessInfo &LAI,
const SCEVPredicate &UnionPred, ElementCount VF, unsigned IC) {
+ // Hard cutoff to limit compile-time increase in case a very large number of
+ // runtime checks needs to be generated.
+ // TODO: Skip cutoff if the loop is guaranteed to execute, e.g. due to
+ // profile info.
+ CostTooHigh =
+ LAI.getNumRuntimePointerChecks() > VectorizeMemoryCheckThreshold;
+ if (CostTooHigh)
+ return;
+
BasicBlock *LoopHeader = L->getHeader();
BasicBlock *Preheader = L->getLoopPreheader();
@@ -1938,6 +1967,44 @@ public:
}
}
+ InstructionCost getCost() {
+ if (SCEVCheckBlock || MemCheckBlock)
+ LLVM_DEBUG(dbgs() << "Calculating cost of runtime checks:\n");
+
+ if (CostTooHigh) {
+ InstructionCost Cost;
+ Cost.setInvalid();
+ LLVM_DEBUG(dbgs() << " number of checks exceeded threshold\n");
+ return Cost;
+ }
+
+ InstructionCost RTCheckCost = 0;
+ if (SCEVCheckBlock)
+ for (Instruction &I : *SCEVCheckBlock) {
+ if (SCEVCheckBlock->getTerminator() == &I)
+ continue;
+ InstructionCost C =
+ TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput);
+ LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n");
+ RTCheckCost += C;
+ }
+ if (MemCheckBlock)
+ for (Instruction &I : *MemCheckBlock) {
+ if (MemCheckBlock->getTerminator() == &I)
+ continue;
+ InstructionCost C =
+ TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput);
+ LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n");
+ RTCheckCost += C;
+ }
+
+ if (SCEVCheckBlock || MemCheckBlock)
+ LLVM_DEBUG(dbgs() << "Total cost of runtime checks: " << RTCheckCost
+ << "\n");
+
+ return RTCheckCost;
+ }
+
/// Remove the created SCEV & memory runtime check blocks & instructions, if
/// unused.
~GeneratedRTChecks() {
@@ -2880,9 +2947,16 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
// If tail is to be folded, vector loop takes care of all iterations.
Type *CountTy = Count->getType();
Value *CheckMinIters = Builder.getFalse();
- Value *Step = createStepForVF(Builder, CountTy, VF, UF);
+ auto CreateStep = [&]() {
+ // Create step with max(MinProTripCount, UF * VF).
+ if (UF * VF.getKnownMinValue() < MinProfitableTripCount.getKnownMinValue())
+ return createStepForVF(Builder, CountTy, MinProfitableTripCount, 1);
+ return createStepForVF(Builder, CountTy, VF, UF);
+ };
+
if (!Cost->foldTailByMasking())
- CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
+ CheckMinIters =
+ Builder.CreateICmp(P, Count, CreateStep(), "min.iters.check");
else if (VF.isScalable()) {
// vscale is not necessarily a power-of-2, which means we cannot guarantee
// an overflow to zero when updating induction variables and so an
@@ -2894,8 +2968,9 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
// Don't execute the vector loop if (UMax - n) < (VF * UF).
- CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
+ CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, CreateStep());
}
+
// Create new preheader for vector loop.
LoopVectorPreHeader =
SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(), DT, LI, nullptr,
@@ -2920,7 +2995,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
}
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
-
BasicBlock *const SCEVCheckBlock =
RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader, LoopExitBlock);
if (!SCEVCheckBlock)
@@ -4792,7 +4866,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
MaxVScale =
TheFunction->getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
MaxScalableVF = ElementCount::getScalable(
- MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
+ MaxVScale ? (MaxSafeElements / MaxVScale.value()) : 0);
if (!MaxScalableVF)
reportVectorizationInfo(
"Max legal vector width too small, scalable vectorization "
@@ -5187,9 +5261,9 @@ bool LoopVectorizationCostModel::isMoreProfitable(
unsigned EstimatedWidthB = B.Width.getKnownMinValue();
if (Optional<unsigned> VScale = getVScaleForTuning()) {
if (A.Width.isScalable())
- EstimatedWidthA *= VScale.getValue();
+ EstimatedWidthA *= VScale.value();
if (B.Width.isScalable())
- EstimatedWidthB *= VScale.getValue();
+ EstimatedWidthB *= VScale.value();
}
// Assume vscale may be larger than 1 (or the value being tuned for),
@@ -5872,10 +5946,11 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
- auto GetRegUsage = [&TTI = TTI](Type *Ty, ElementCount VF) -> unsigned {
+ const auto &TTICapture = TTI;
+ auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty))
return 0;
- return TTI.getRegUsageForType(VectorType::get(Ty, VF));
+ return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
};
for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) {
@@ -6014,6 +6089,8 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
// map will indicate that we've analyzed it already.
ScalarCostsTy &ScalarCostsVF = InstsToScalarize[VF];
+ PredicatedBBsAfterVectorization[VF].clear();
+
// Find all the instructions that are scalar with predication in the loop and
// determine if it would be better to not if-convert the blocks they are in.
// If so, we also record the instructions to scalarize.
@@ -6031,7 +6108,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
// Remember that BB will remain after vectorization.
- PredicatedBBsAfterVectorization.insert(BB);
+ PredicatedBBsAfterVectorization[VF].insert(BB);
}
}
}
@@ -6896,8 +6973,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
bool ScalarPredicatedBB = false;
BranchInst *BI = cast<BranchInst>(I);
if (VF.isVector() && BI->isConditional() &&
- (PredicatedBBsAfterVectorization.count(BI->getSuccessor(0)) ||
- PredicatedBBsAfterVectorization.count(BI->getSuccessor(1))))
+ (PredicatedBBsAfterVectorization[VF].count(BI->getSuccessor(0)) ||
+ PredicatedBBsAfterVectorization[VF].count(BI->getSuccessor(1))))
ScalarPredicatedBB = true;
if (ScalarPredicatedBB) {
@@ -7363,14 +7440,6 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
return VectorizationFactor::Disabled();
}
-bool LoopVectorizationPlanner::requiresTooManyRuntimeChecks() const {
- unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks();
- return (NumRuntimePointerChecks >
- VectorizerParams::RuntimeMemoryCheckThreshold &&
- !Hints.allowReordering()) ||
- NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
-}
-
Optional<VectorizationFactor>
LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
@@ -7439,7 +7508,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VectorizationFactor::Disabled();
// Select the optimal vectorization factor.
- return CM.selectVectorizationFactor(VFCandidates);
+ VectorizationFactor VF = CM.selectVectorizationFactor(VFCandidates);
+ assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
+ return VF;
}
VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
@@ -7554,7 +7625,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
if (VectorizedLoopID)
- L->setLoopID(VectorizedLoopID.getValue());
+ L->setLoopID(VectorizedLoopID.value());
else {
// Keep all loop hints from the original loop on the vector loop (we'll
// replace the vectorizer-specific hints below).
@@ -7585,51 +7656,6 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
}
#endif
-void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
- SmallPtrSetImpl<Instruction *> &DeadInstructions) {
-
- // We create new control-flow for the vectorized loop, so the original exit
- // conditions will be dead after vectorization if it's only used by the
- // terminator
- SmallVector<BasicBlock*> ExitingBlocks;
- OrigLoop->getExitingBlocks(ExitingBlocks);
- for (auto *BB : ExitingBlocks) {
- auto *Cmp = dyn_cast<Instruction>(BB->getTerminator()->getOperand(0));
- if (!Cmp || !Cmp->hasOneUse())
- continue;
-
- // TODO: we should introduce a getUniqueExitingBlocks on Loop
- if (!DeadInstructions.insert(Cmp).second)
- continue;
-
- // The operands of the icmp is often a dead trunc, used by IndUpdate.
- // TODO: can recurse through operands in general
- for (Value *Op : Cmp->operands()) {
- if (isa<TruncInst>(Op) && Op->hasOneUse())
- DeadInstructions.insert(cast<Instruction>(Op));
- }
- }
-
- // We create new "steps" for induction variable updates to which the original
- // induction variables map. An original update instruction will be dead if
- // all its users except the induction variable are dead.
- auto *Latch = OrigLoop->getLoopLatch();
- for (auto &Induction : Legal->getInductionVars()) {
- PHINode *Ind = Induction.first;
- auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
-
- // If the tail is to be folded by masking, the primary induction variable,
- // if exists, isn't dead: it will be used for masking. Don't kill it.
- if (CM.foldTailByMasking() && IndUpdate == Legal->getPrimaryInduction())
- continue;
-
- if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
- return U == Ind || DeadInstructions.count(cast<Instruction>(U));
- }))
- DeadInstructions.insert(IndUpdate);
- }
-}
-
Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
//===--------------------------------------------------------------------===//
@@ -8001,11 +8027,19 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
if (!CM.blockNeedsPredicationForAnyReason(BB))
return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one.
+ assert(CM.foldTailByMasking() && "must fold the tail");
+
+ // If we're using the active lane mask for control flow, then we get the
+ // mask from the active lane mask PHI that is cached in the VPlan.
+ PredicationStyle EmitGetActiveLaneMask = CM.TTI.emitGetActiveLaneMask();
+ if (EmitGetActiveLaneMask == PredicationStyle::DataAndControlFlow)
+ return BlockMaskCache[BB] = Plan->getActiveLaneMaskPhi();
+
// Introduce the early-exit compare IV <= BTC to form header block mask.
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
// constructing the desired canonical IV in the header block as its first
// non-phi instructions.
- assert(CM.foldTailByMasking() && "must fold the tail");
+
VPBasicBlock *HeaderVPBB =
Plan->getVectorLoopRegion()->getEntryBasicBlock();
auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
@@ -8014,9 +8048,10 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
- if (CM.TTI.emitGetActiveLaneMask()) {
+ if (EmitGetActiveLaneMask != PredicationStyle::None) {
VPValue *TC = Plan->getOrCreateTripCount();
- BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC});
+ BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC},
+ nullptr, "active.lane.mask");
} else {
VPValue *BTC = Plan->getOrCreateBackedgeTakenCount();
BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC});
@@ -8409,9 +8444,8 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
return RegSucc;
}
-VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
- VPRecipeBase *PredRecipe,
- VPlanPtr &Plan) {
+VPRegionBlock *VPRecipeBuilder::createReplicateRegion(
+ Instruction *Instr, VPReplicateRecipe *PredRecipe, VPlanPtr &Plan) {
// Instructions marked for predication are replicated and placed under an
// if-then construct to prevent side-effects.
@@ -8425,7 +8459,7 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
auto *PHIRecipe = Instr->getType()->isVoidTy()
? nullptr
- : new VPPredInstPHIRecipe(Plan->getOrAddVPValue(Instr));
+ : new VPPredInstPHIRecipe(PredRecipe);
if (PHIRecipe) {
Plan->removeVPValueFor(Instr);
Plan->addVPValue(Instr, PHIRecipe);
@@ -8517,19 +8551,11 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
ElementCount MaxVF) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
- // Collect instructions from the original loop that will become trivially dead
- // in the vectorized loop. We don't need to vectorize these instructions. For
- // example, original induction update instructions can become dead because we
- // separately emit induction "steps" when generating code for the new loop.
- // Similarly, we create a new latch condition when setting up the structure
- // of the new loop, so the old one can become dead.
- SmallPtrSet<Instruction *, 4> DeadInstructions;
- collectTriviallyDeadInstructions(DeadInstructions);
-
// Add assume instructions we need to drop to DeadInstructions, to prevent
// them from being added to the VPlan.
// TODO: We only need to drop assumes in blocks that get flattend. If the
// control flow is preserved, we should keep them.
+ SmallPtrSet<Instruction *, 4> DeadInstructions;
auto &ConditionalAssumes = Legal->getConditionalAssumes();
DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end());
@@ -8565,32 +8591,84 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
}
}
-// Add a VPCanonicalIVPHIRecipe starting at 0 to the header, a
-// CanonicalIVIncrement{NUW} VPInstruction to increment it by VF * UF and a
-// BranchOnCount VPInstruction to the latch.
+// Add the necessary canonical IV and branch recipes required to control the
+// loop.
static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
- bool HasNUW) {
+ bool HasNUW,
+ bool UseLaneMaskForLoopControlFlow) {
Value *StartIdx = ConstantInt::get(IdxTy, 0);
auto *StartV = Plan.getOrAddVPValue(StartIdx);
+ // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
Header->insert(CanonicalIVPHI, Header->begin());
+ // Add a CanonicalIVIncrement{NUW} VPInstruction to increment the scalar
+ // IV by VF * UF.
auto *CanonicalIVIncrement =
new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementNUW
: VPInstruction::CanonicalIVIncrement,
- {CanonicalIVPHI}, DL);
+ {CanonicalIVPHI}, DL, "index.next");
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
EB->appendRecipe(CanonicalIVIncrement);
- auto *BranchOnCount =
- new VPInstruction(VPInstruction::BranchOnCount,
- {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
- EB->appendRecipe(BranchOnCount);
+ if (UseLaneMaskForLoopControlFlow) {
+ // Create the active lane mask instruction in the vplan preheader.
+ VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
+
+ // We can't use StartV directly in the ActiveLaneMask VPInstruction, since
+ // we have to take unrolling into account. Each part needs to start at
+ // Part * VF
+ auto *CanonicalIVIncrementParts =
+ new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementForPartNUW
+ : VPInstruction::CanonicalIVIncrementForPart,
+ {StartV}, DL, "index.part.next");
+ Preheader->appendRecipe(CanonicalIVIncrementParts);
+
+ // Create the ActiveLaneMask instruction using the correct start values.
+ VPValue *TC = Plan.getOrCreateTripCount();
+ auto *EntryALM = new VPInstruction(VPInstruction::ActiveLaneMask,
+ {CanonicalIVIncrementParts, TC}, DL,
+ "active.lane.mask.entry");
+ Preheader->appendRecipe(EntryALM);
+
+ // Now create the ActiveLaneMaskPhi recipe in the main loop using the
+ // preheader ActiveLaneMask instruction.
+ auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc());
+ Header->insert(LaneMaskPhi, Header->getFirstNonPhi());
+
+ // Create the active lane mask for the next iteration of the loop.
+ CanonicalIVIncrementParts =
+ new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementForPartNUW
+ : VPInstruction::CanonicalIVIncrementForPart,
+ {CanonicalIVIncrement}, DL);
+ EB->appendRecipe(CanonicalIVIncrementParts);
+
+ auto *ALM = new VPInstruction(VPInstruction::ActiveLaneMask,
+ {CanonicalIVIncrementParts, TC}, DL,
+ "active.lane.mask.next");
+ EB->appendRecipe(ALM);
+ LaneMaskPhi->addOperand(ALM);
+
+ // We have to invert the mask here because a true condition means jumping
+ // to the exit block.
+ auto *NotMask = new VPInstruction(VPInstruction::Not, ALM, DL);
+ EB->appendRecipe(NotMask);
+
+ VPInstruction *BranchBack =
+ new VPInstruction(VPInstruction::BranchOnCond, {NotMask}, DL);
+ EB->appendRecipe(BranchBack);
+ } else {
+ // Add the BranchOnCount VPInstruction to the latch.
+ VPInstruction *BranchBack = new VPInstruction(
+ VPInstruction::BranchOnCount,
+ {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
+ EB->appendRecipe(BranchBack);
+ }
}
// Add exit values to \p Plan. VPLiveOuts are added for each LCSSA phi in the
@@ -8691,7 +8769,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(),
DLInst ? DLInst->getDebugLoc() : DebugLoc(),
- !CM.foldTailByMasking());
+ !CM.foldTailByMasking(),
+ CM.useActiveLaneMaskForControlFlow());
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
@@ -8961,8 +9040,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE());
VPlanTransforms::sinkScalarOperands(*Plan);
- VPlanTransforms::mergeReplicateRegions(*Plan);
VPlanTransforms::removeDeadRecipes(*Plan);
+ VPlanTransforms::mergeReplicateRegions(*Plan);
VPlanTransforms::removeRedundantExpandSCEVRecipes(*Plan);
// Fold Exit block into its predecessor if possible.
@@ -9006,7 +9085,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
Term->eraseFromParent();
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(),
- true);
+ true, CM.useActiveLaneMaskForControlFlow());
return Plan;
}
@@ -9078,7 +9157,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
Plan->removeVPValueFor(R);
Plan->addVPValue(R, RedRecipe);
- WidenRecipe->getParent()->insert(RedRecipe, WidenRecipe->getIterator());
+ // Append the recipe to the end of the VPBasicBlock because we need to
+ // ensure that it comes after all of it's inputs, including CondOp.
+ WidenRecipe->getParent()->appendRecipe(RedRecipe);
WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
WidenRecipe->eraseFromParent();
@@ -9151,229 +9232,6 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
*this, State);
}
-void VPWidenSelectRecipe::execute(VPTransformState &State) {
- auto &I = *cast<SelectInst>(getUnderlyingInstr());
- State.setDebugLocFromInst(&I);
-
- // The condition can be loop invariant but still defined inside the
- // loop. This means that we can't just use the original 'cond' value.
- // We have to take the 'vectorized' value and pick the first lane.
- // Instcombine will make this a no-op.
- auto *InvarCond =
- InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
- Value *Op0 = State.get(getOperand(1), Part);
- Value *Op1 = State.get(getOperand(2), Part);
- Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
- State.set(this, Sel, Part);
- State.addMetadata(Sel, &I);
- }
-}
-
-void VPWidenRecipe::execute(VPTransformState &State) {
- auto &I = *cast<Instruction>(getUnderlyingValue());
- auto &Builder = State.Builder;
- switch (I.getOpcode()) {
- case Instruction::Call:
- case Instruction::Br:
- case Instruction::PHI:
- case Instruction::GetElementPtr:
- case Instruction::Select:
- llvm_unreachable("This instruction is handled by a different recipe.");
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::URem:
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::FNeg:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- // Just widen unops and binops.
- State.setDebugLocFromInst(&I);
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- SmallVector<Value *, 2> Ops;
- for (VPValue *VPOp : operands())
- Ops.push_back(State.get(VPOp, Part));
-
- Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
-
- if (auto *VecOp = dyn_cast<Instruction>(V)) {
- VecOp->copyIRFlags(&I);
-
- // If the instruction is vectorized and was in a basic block that needed
- // predication, we can't propagate poison-generating flags (nuw/nsw,
- // exact, etc.). The control flow has been linearized and the
- // instruction is no longer guarded by the predicate, which could make
- // the flag properties to no longer hold.
- if (State.MayGeneratePoisonRecipes.contains(this))
- VecOp->dropPoisonGeneratingFlags();
- }
-
- // Use this vector value for all users of the original instruction.
- State.set(this, V, Part);
- State.addMetadata(V, &I);
- }
-
- break;
- }
- case Instruction::Freeze: {
- State.setDebugLocFromInst(&I);
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *Op = State.get(getOperand(0), Part);
-
- Value *Freeze = Builder.CreateFreeze(Op);
- State.set(this, Freeze, Part);
- }
- break;
- }
- case Instruction::ICmp:
- case Instruction::FCmp: {
- // Widen compares. Generate vector compares.
- bool FCmp = (I.getOpcode() == Instruction::FCmp);
- auto *Cmp = cast<CmpInst>(&I);
- State.setDebugLocFromInst(Cmp);
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *A = State.get(getOperand(0), Part);
- Value *B = State.get(getOperand(1), Part);
- Value *C = nullptr;
- if (FCmp) {
- // Propagate fast math flags.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- Builder.setFastMathFlags(Cmp->getFastMathFlags());
- C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
- } else {
- C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
- }
- State.set(this, C, Part);
- State.addMetadata(C, &I);
- }
-
- break;
- }
-
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- auto *CI = cast<CastInst>(&I);
- State.setDebugLocFromInst(CI);
-
- /// Vectorize casts.
- Type *DestTy = (State.VF.isScalar())
- ? CI->getType()
- : VectorType::get(CI->getType(), State.VF);
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *A = State.get(getOperand(0), Part);
- Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
- State.set(this, Cast, Part);
- State.addMetadata(Cast, &I);
- }
- break;
- }
- default:
- // This instruction is not vectorized by simple widening.
- LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
- llvm_unreachable("Unhandled instruction!");
- } // end of switch.
-}
-
-void VPWidenGEPRecipe::execute(VPTransformState &State) {
- auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
- // Construct a vector GEP by widening the operands of the scalar GEP as
- // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
- // results in a vector of pointers when at least one operand of the GEP
- // is vector-typed. Thus, to keep the representation compact, we only use
- // vector-typed operands for loop-varying values.
-
- if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
- // If we are vectorizing, but the GEP has only loop-invariant operands,
- // the GEP we build (by only using vector-typed operands for
- // loop-varying values) would be a scalar pointer. Thus, to ensure we
- // produce a vector of pointers, we need to either arbitrarily pick an
- // operand to broadcast, or broadcast a clone of the original GEP.
- // Here, we broadcast a clone of the original.
- //
- // TODO: If at some point we decide to scalarize instructions having
- // loop-invariant operands, this special case will no longer be
- // required. We would add the scalarization decision to
- // collectLoopScalars() and teach getVectorValue() to broadcast
- // the lane-zero scalar value.
- auto *Clone = State.Builder.Insert(GEP->clone());
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
- State.set(this, EntryPart, Part);
- State.addMetadata(EntryPart, GEP);
- }
- } else {
- // If the GEP has at least one loop-varying operand, we are sure to
- // produce a vector of pointers. But if we are only unrolling, we want
- // to produce a scalar GEP for each unroll part. Thus, the GEP we
- // produce with the code below will be scalar (if VF == 1) or vector
- // (otherwise). Note that for the unroll-only case, we still maintain
- // values in the vector mapping with initVector, as we do for other
- // instructions.
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- // The pointer operand of the new GEP. If it's loop-invariant, we
- // won't broadcast it.
- auto *Ptr = IsPtrLoopInvariant
- ? State.get(getOperand(0), VPIteration(0, 0))
- : State.get(getOperand(0), Part);
-
- // Collect all the indices for the new GEP. If any index is
- // loop-invariant, we won't broadcast it.
- SmallVector<Value *, 4> Indices;
- for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
- VPValue *Operand = getOperand(I);
- if (IsIndexLoopInvariant[I - 1])
- Indices.push_back(State.get(Operand, VPIteration(0, 0)));
- else
- Indices.push_back(State.get(Operand, Part));
- }
-
- // If the GEP instruction is vectorized and was in a basic block that
- // needed predication, we can't propagate the poison-generating 'inbounds'
- // flag. The control flow has been linearized and the GEP is no longer
- // guarded by the predicate, which could make the 'inbounds' properties to
- // no longer hold.
- bool IsInBounds =
- GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
-
- // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
- // but it should be a vector, otherwise.
- auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
- Indices, "", IsInBounds);
- assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
- "NewGEP is not a pointer vector");
- State.set(this, NewGEP, Part);
- State.addMetadata(NewGEP, GEP);
- }
- }
-}
-
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Int or FP induction being replicated.");
@@ -9632,45 +9490,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
}
}
-void VPBlendRecipe::execute(VPTransformState &State) {
- State.setDebugLocFromInst(Phi);
- // We know that all PHIs in non-header blocks are converted into
- // selects, so we don't have to worry about the insertion order and we
- // can just use the builder.
- // At this point we generate the predication tree. There may be
- // duplications since this is a simple recursive scan, but future
- // optimizations will clean it up.
-
- unsigned NumIncoming = getNumIncomingValues();
-
- // Generate a sequence of selects of the form:
- // SELECT(Mask3, In3,
- // SELECT(Mask2, In2,
- // SELECT(Mask1, In1,
- // In0)))
- // Note that Mask0 is never used: lanes for which no path reaches this phi and
- // are essentially undef are taken from In0.
- InnerLoopVectorizer::VectorParts Entry(State.UF);
- for (unsigned In = 0; In < NumIncoming; ++In) {
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- // We might have single edge PHIs (blocks) - use an identity
- // 'select' for the first PHI operand.
- Value *In0 = State.get(getIncomingValue(In), Part);
- if (In == 0)
- Entry[Part] = In0; // Initialize with the first incoming value.
- else {
- // Select between the current value and the previous incoming edge
- // based on the incoming mask.
- Value *Cond = State.get(getMask(In), Part);
- Entry[Part] =
- State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
- }
- }
- }
- for (unsigned Part = 0; Part < State.UF; ++Part)
- State.set(this, Entry[Part], Part);
-}
-
void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Interleave group being replicated.");
State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(),
@@ -9758,32 +9577,6 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
State);
}
-void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
- assert(State.Instance && "Branch on Mask works only on single instance.");
-
- unsigned Part = State.Instance->Part;
- unsigned Lane = State.Instance->Lane.getKnownLane();
-
- Value *ConditionBit = nullptr;
- VPValue *BlockInMask = getMask();
- if (BlockInMask) {
- ConditionBit = State.get(BlockInMask, Part);
- if (ConditionBit->getType()->isVectorTy())
- ConditionBit = State.Builder.CreateExtractElement(
- ConditionBit, State.Builder.getInt32(Lane));
- } else // Block in mask is all-one.
- ConditionBit = State.Builder.getTrue();
-
- // Replace the temporary unreachable terminator with a new conditional branch,
- // whose two destinations will be set later when they are created.
- auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
- assert(isa<UnreachableInst>(CurrentTerminator) &&
- "Expected to replace unreachable terminator with conditional branch.");
- auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
- CondBr->setSuccessor(0, nullptr);
- ReplaceInstWithInst(CurrentTerminator, CondBr);
-}
-
void VPPredInstPHIRecipe::execute(VPTransformState &State) {
assert(State.Instance && "Predicated instruction PHI works per instance.");
Instruction *ScalarPredInst =
@@ -10103,8 +9896,7 @@ static bool processLoopInVPlanNativePath(
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
// optional argument if we don't need it in the future.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE, Hints,
- Requirements, ORE);
+ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE, Hints, ORE);
// Get user vectorization factor.
ElementCount UserVF = Hints.getWidth();
@@ -10123,10 +9915,10 @@ static bool processLoopInVPlanNativePath(
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
{
- GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
+ GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
F->getParent()->getDataLayout());
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
- &CM, BFI, PSI, Checks);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
+ VF.Width, 1, LVL, &CM, BFI, PSI, Checks);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
@@ -10183,6 +9975,105 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
}
}
+static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
+ VectorizationFactor &VF,
+ Optional<unsigned> VScale, Loop *L,
+ ScalarEvolution &SE) {
+ InstructionCost CheckCost = Checks.getCost();
+ if (!CheckCost.isValid())
+ return false;
+
+ // When interleaving only scalar and vector cost will be equal, which in turn
+ // would lead to a divide by 0. Fall back to hard threshold.
+ if (VF.Width.isScalar()) {
+ if (CheckCost > VectorizeMemoryCheckThreshold) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Interleaving only is not profitable due to runtime checks\n");
+ return false;
+ }
+ return true;
+ }
+
+ // The scalar cost should only be 0 when vectorizing with a user specified VF/IC. In those cases, runtime checks should always be generated.
+ double ScalarC = *VF.ScalarCost.getValue();
+ if (ScalarC == 0)
+ return true;
+
+ // First, compute the minimum iteration count required so that the vector
+ // loop outperforms the scalar loop.
+ // The total cost of the scalar loop is
+ // ScalarC * TC
+ // where
+ // * TC is the actual trip count of the loop.
+ // * ScalarC is the cost of a single scalar iteration.
+ //
+ // The total cost of the vector loop is
+ // RtC + VecC * (TC / VF) + EpiC
+ // where
+ // * RtC is the cost of the generated runtime checks
+ // * VecC is the cost of a single vector iteration.
+ // * TC is the actual trip count of the loop
+ // * VF is the vectorization factor
+ // * EpiCost is the cost of the generated epilogue, including the cost
+ // of the remaining scalar operations.
+ //
+ // Vectorization is profitable once the total vector cost is less than the
+ // total scalar cost:
+ // RtC + VecC * (TC / VF) + EpiC < ScalarC * TC
+ //
+ // Now we can compute the minimum required trip count TC as
+ // (RtC + EpiC) / (ScalarC - (VecC / VF)) < TC
+ //
+ // For now we assume the epilogue cost EpiC = 0 for simplicity. Note that
+ // the computations are performed on doubles, not integers and the result
+ // is rounded up, hence we get an upper estimate of the TC.
+ unsigned IntVF = VF.Width.getKnownMinValue();
+ if (VF.Width.isScalable()) {
+ unsigned AssumedMinimumVscale = 1;
+ if (VScale)
+ AssumedMinimumVscale = *VScale;
+ IntVF *= AssumedMinimumVscale;
+ }
+ double VecCOverVF = double(*VF.Cost.getValue()) / IntVF;
+ double RtC = *CheckCost.getValue();
+ double MinTC1 = RtC / (ScalarC - VecCOverVF);
+
+ // Second, compute a minimum iteration count so that the cost of the
+ // runtime checks is only a fraction of the total scalar loop cost. This
+ // adds a loop-dependent bound on the overhead incurred if the runtime
+ // checks fail. In case the runtime checks fail, the cost is RtC + ScalarC
+ // * TC. To bound the runtime check to be a fraction 1/X of the scalar
+ // cost, compute
+ // RtC < ScalarC * TC * (1 / X) ==> RtC * X / ScalarC < TC
+ double MinTC2 = RtC * 10 / ScalarC;
+
+ // Now pick the larger minimum. If it is not a multiple of VF, choose the
+ // next closest multiple of VF. This should partly compensate for ignoring
+ // the epilogue cost.
+ uint64_t MinTC = std::ceil(std::max(MinTC1, MinTC2));
+ VF.MinProfitableTripCount = ElementCount::getFixed(alignTo(MinTC, IntVF));
+
+ LLVM_DEBUG(
+ dbgs() << "LV: Minimum required TC for runtime checks to be profitable:"
+ << VF.MinProfitableTripCount << "\n");
+
+ // Skip vectorization if the expected trip count is less than the minimum
+ // required trip count.
+ if (auto ExpectedTC = getSmallBestKnownTC(SE, L)) {
+ if (ElementCount::isKnownLT(ElementCount::getFixed(*ExpectedTC),
+ VF.MinProfitableTripCount)) {
+ LLVM_DEBUG(dbgs() << "LV: Vectorization is not beneficial: expected "
+ "trip count < minimum profitable VF ("
+ << *ExpectedTC << " < " << VF.MinProfitableTripCount
+ << ")\n");
+
+ return false;
+ }
+ }
+ return true;
+}
+
LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts)
: InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced ||
!EnableLoopInterleaving),
@@ -10340,8 +10231,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
CM.collectElementTypesForWidening();
// Use the planner for vectorization.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE, Hints,
- Requirements, ORE);
+ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE, Hints, ORE);
// Get user vectorization factor and interleave count.
ElementCount UserVF = Hints.getWidth();
@@ -10353,10 +10243,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizationFactor VF = VectorizationFactor::Disabled();
unsigned IC = 1;
- GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
+ GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
F->getParent()->getDataLayout());
if (MaybeVF) {
- if (LVP.requiresTooManyRuntimeChecks()) {
+ VF = *MaybeVF;
+ // Select the interleave count.
+ IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue());
+
+ unsigned SelectedIC = std::max(IC, UserIC);
+ // Optimistically generate runtime checks if they are needed. Drop them if
+ // they turn out to not be profitable.
+ if (VF.Width.isVector() || SelectedIC > 1)
+ Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
+
+ // Check if it is profitable to vectorize with runtime checks.
+ bool ForceVectorization =
+ Hints.getForce() == LoopVectorizeHints::FK_Enabled;
+ if (!ForceVectorization &&
+ !areRuntimeChecksProfitable(Checks, VF, CM.getVScaleForTuning(), L,
+ *PSE.getSE())) {
ORE->emit([&]() {
return OptimizationRemarkAnalysisAliasing(
DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(),
@@ -10368,15 +10273,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
Hints.emitRemarkWithHints();
return false;
}
- VF = *MaybeVF;
- // Select the interleave count.
- IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue());
-
- unsigned SelectedIC = std::max(IC, UserIC);
- // Optimistically generate runtime checks if they are needed. Drop them if
- // they turn out to not be profitable.
- if (VF.Width.isVector() || SelectedIC > 1)
- Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
}
// Identify the diagnostic messages that should be produced.
@@ -10533,8 +10429,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (!MainILV.areSafetyChecksAdded())
DisableRuntimeUnroll = true;
} else {
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
- &LVL, &CM, BFI, PSI, Checks);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
+ VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
+ PSI, Checks);
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
@@ -10564,7 +10461,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupEpilogue});
if (RemainderLoopID) {
- L->setLoopID(RemainderLoopID.getValue());
+ L->setLoopID(RemainderLoopID.value());
} else {
if (DisableRuntimeUnroll)
AddRuntimeUnrollDisableMetaData(L);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 019a09665a67..e136cd9aedac 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2637,7 +2637,7 @@ private:
AliasCacheKey key = std::make_pair(Inst1, Inst2);
Optional<bool> &result = AliasCache[key];
if (result) {
- return result.getValue();
+ return result.value();
}
bool aliased = true;
if (Loc1.Ptr && isSimple(Inst1))
@@ -4592,7 +4592,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
};
InstructionsState S = getSameOpcode(VL);
- if (Depth == RecursionMaxDepth) {
+
+ // Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
+ // a load), in which case peek through to include it in the tree, without
+ // ballooning over-budget.
+ if (Depth >= RecursionMaxDepth &&
+ !(S.MainOp && isa<Instruction>(S.MainOp) && S.MainOp == S.AltOp &&
+ VL.size() >= 4 &&
+ (match(S.MainOp, m_Load(m_Value())) || all_of(VL, [&S](const Value *I) {
+ return match(I,
+ m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
+ cast<Instruction>(I)->getOpcode() ==
+ cast<Instruction>(S.MainOp)->getOpcode();
+ })))) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
@@ -11217,7 +11229,7 @@ public:
return OptimizationRemarkMissed(
SV_NAME, "HorSLPNotBeneficial",
ReducedValsToOps.find(VL[0])->second.front())
- << "Vectorizing horizontal reduction is possible"
+ << "Vectorizing horizontal reduction is possible "
<< "but not beneficial with cost " << ore::NV("Cost", Cost)
<< " and threshold "
<< ore::NV("Threshold", -SLPCostThreshold);
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 97f2b1a93815..c7949c42c03e 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -159,7 +159,8 @@ public:
/// Create a replicating region for instruction \p I that requires
/// predication. \p PredRecipe is a VPReplicateRecipe holding \p I.
- VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe,
+ VPRegionBlock *createReplicateRegion(Instruction *I,
+ VPReplicateRecipe *PredRecipe,
VPlanPtr &Plan);
/// Build a VPReplicationRecipe for \p I and enclose it within a Region if it
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4d709097c306..30032dda7f60 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -248,25 +248,27 @@ void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
}
void VPTransformState::setDebugLocFromInst(const Value *V) {
- if (const Instruction *Inst = dyn_cast_or_null<Instruction>(V)) {
- const DILocation *DIL = Inst->getDebugLoc();
+ const Instruction *Inst = dyn_cast<Instruction>(V);
+ if (!Inst) {
+ Builder.SetCurrentDebugLocation(DebugLoc());
+ return;
+ }
- // When a FSDiscriminator is enabled, we don't need to add the multiply
- // factors to the discriminators.
- if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
- !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
- // FIXME: For scalable vectors, assume vscale=1.
- auto NewDIL =
- DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
- if (NewDIL)
- Builder.SetCurrentDebugLocation(*NewDIL);
- else
- LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
- << DIL->getFilename() << " Line: " << DIL->getLine());
- } else
- Builder.SetCurrentDebugLocation(DIL);
+ const DILocation *DIL = Inst->getDebugLoc();
+ // When a FSDiscriminator is enabled, we don't need to add the multiply
+ // factors to the discriminators.
+ if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
+ !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
+ // FIXME: For scalable vectors, assume vscale=1.
+ auto NewDIL =
+ DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
+ if (NewDIL)
+ Builder.SetCurrentDebugLocation(*NewDIL);
+ else
+ LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
} else
- Builder.SetCurrentDebugLocation(DebugLoc());
+ Builder.SetCurrentDebugLocation(DIL);
}
BasicBlock *
@@ -566,6 +568,24 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
+VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
+ VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
+ for (VPRecipeBase &R : Header->phis()) {
+ if (isa<VPActiveLaneMaskPHIRecipe>(&R))
+ return cast<VPActiveLaneMaskPHIRecipe>(&R);
+ }
+ return nullptr;
+}
+
+static bool canSimplifyBranchOnCond(VPInstruction *Term) {
+ VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0));
+ if (!Not || Not->getOpcode() != VPInstruction::Not)
+ return false;
+
+ VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0));
+ return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask;
+}
+
void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
Value *CanonicalIVStartValue,
VPTransformState &State,
@@ -573,11 +593,15 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock();
auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
- // Try to simplify BranchOnCount to 'BranchOnCond true' if TC <= VF * UF when
- // preparing to execute the plan for the main vector loop.
- if (!IsEpilogueVectorization && Term &&
- Term->getOpcode() == VPInstruction::BranchOnCount &&
- isa<ConstantInt>(TripCountV)) {
+ // Try to simplify the branch condition if TC <= VF * UF when preparing to
+ // execute the plan for the main vector loop. We only do this if the
+ // terminator is:
+ // 1. BranchOnCount, or
+ // 2. BranchOnCond where the input is Not(ActiveLaneMask).
+ if (!IsEpilogueVectorization && Term && isa<ConstantInt>(TripCountV) &&
+ (Term->getOpcode() == VPInstruction::BranchOnCount ||
+ (Term->getOpcode() == VPInstruction::BranchOnCond &&
+ canSimplifyBranchOnCond(Term)))) {
ConstantInt *C = cast<ConstantInt>(TripCountV);
uint64_t TCVal = C->getZExtValue();
if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) {
@@ -697,7 +721,8 @@ void VPlan::execute(VPTransformState *State) {
// generated.
bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
- cast<VPReductionPHIRecipe>(PhiR)->isOrdered();
+ (isa<VPReductionPHIRecipe>(PhiR) &&
+ cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 09da4a545d0d..f009a7ee6b4b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -784,6 +784,10 @@ public:
ActiveLaneMask,
CanonicalIVIncrement,
CanonicalIVIncrementNUW,
+ // The next two are similar to the above, but instead increment the
+ // canonical IV separately for each unrolled part.
+ CanonicalIVIncrementForPart,
+ CanonicalIVIncrementForPartNUW,
BranchOnCount,
BranchOnCond
};
@@ -794,6 +798,9 @@ private:
FastMathFlags FMF;
DebugLoc DL;
+ /// An optional name that can be used for the generated IR instruction.
+ const std::string Name;
+
/// Utility method serving execute(): generates a single instance of the
/// modeled instruction.
void generateInstruction(VPTransformState &State, unsigned Part);
@@ -802,14 +809,15 @@ protected:
void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
public:
- VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL)
+ VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
+ const Twine &Name = "")
: VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands),
VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode),
- DL(DL) {}
+ DL(DL), Name(Name.str()) {}
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
- DebugLoc DL = {})
- : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL) {}
+ DebugLoc DL = {}, const Twine &Name = "")
+ : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPValue *V) {
@@ -818,7 +826,7 @@ public:
VPInstruction *clone() const {
SmallVector<VPValue *, 2> Operands(operands());
- return new VPInstruction(Opcode, Operands, DL);
+ return new VPInstruction(Opcode, Operands, DL, Name);
}
/// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -897,6 +905,8 @@ public:
case VPInstruction::ActiveLaneMask:
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementNUW:
+ case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::CanonicalIVIncrementForPartNUW:
case VPInstruction::BranchOnCount:
return true;
};
@@ -1125,6 +1135,7 @@ public:
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *B) {
return B->getVPDefID() == VPRecipeBase::VPCanonicalIVPHISC ||
+ B->getVPDefID() == VPRecipeBase::VPActiveLaneMaskPHISC ||
B->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC ||
B->getVPDefID() == VPRecipeBase::VPReductionPHISC ||
B->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
@@ -1132,6 +1143,7 @@ public:
}
static inline bool classof(const VPValue *V) {
return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC ||
+ V->getVPValueID() == VPValue::VPVActiveLaneMaskPHISC ||
V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC ||
V->getVPValueID() == VPValue::VPVReductionPHISC ||
V->getVPValueID() == VPValue::VPVWidenIntOrFpInductionSC ||
@@ -1861,6 +1873,42 @@ public:
}
};
+/// A recipe for generating the active lane mask for the vector loop that is
+/// used to predicate the vector operations.
+/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
+/// remove VPActiveLaneMaskPHIRecipe.
+class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
+ DebugLoc DL;
+
+public:
+ VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
+ : VPHeaderPHIRecipe(VPValue::VPVActiveLaneMaskPHISC,
+ VPActiveLaneMaskPHISC, nullptr, StartMask),
+ DL(DL) {}
+
+ ~VPActiveLaneMaskPHIRecipe() override = default;
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPActiveLaneMaskPHISC;
+ }
+ static inline bool classof(const VPHeaderPHIRecipe *D) {
+ return D->getVPDefID() == VPActiveLaneMaskPHISC;
+ }
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPVActiveLaneMaskPHISC;
+ }
+
+ /// Generate the active lane mask phi of the vector loop.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A Recipe for widening the canonical induction variable of the vector loop.
class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
public:
@@ -2656,6 +2704,10 @@ public:
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
}
+ /// Find and return the VPActiveLaneMaskPHIRecipe from the header - there
+ /// be only one at most. If there isn't one, then return nullptr.
+ VPActiveLaneMaskPHIRecipe *getActiveLaneMaskPhi();
+
void addLiveOut(PHINode *PN, VPValue *V);
void clearLiveOuts() {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 92422b17457c..fdd901a4a70d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -26,13 +26,19 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <cassert>
using namespace llvm;
+using VectorParts = SmallVector<Value *, 2>;
+
extern cl::opt<bool> EnableVPlanNativePath;
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+
bool VPRecipeBase::mayWriteToMemory() const {
switch (getVPDefID()) {
case VPWidenMemoryInstructionSC: {
@@ -186,7 +192,8 @@ void VPInstruction::generateInstruction(VPTransformState &State,
if (Instruction::isBinaryOp(getOpcode())) {
Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
- Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
+ Value *V =
+ Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
State.set(this, V, Part);
return;
}
@@ -194,14 +201,14 @@ void VPInstruction::generateInstruction(VPTransformState &State,
switch (getOpcode()) {
case VPInstruction::Not: {
Value *A = State.get(getOperand(0), Part);
- Value *V = Builder.CreateNot(A);
+ Value *V = Builder.CreateNot(A, Name);
State.set(this, V, Part);
break;
}
case VPInstruction::ICmpULE: {
Value *IV = State.get(getOperand(0), Part);
Value *TC = State.get(getOperand(1), Part);
- Value *V = Builder.CreateICmpULE(IV, TC);
+ Value *V = Builder.CreateICmpULE(IV, TC, Name);
State.set(this, V, Part);
break;
}
@@ -209,7 +216,7 @@ void VPInstruction::generateInstruction(VPTransformState &State,
Value *Cond = State.get(getOperand(0), Part);
Value *Op1 = State.get(getOperand(1), Part);
Value *Op2 = State.get(getOperand(2), Part);
- Value *V = Builder.CreateSelect(Cond, Op1, Op2);
+ Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name);
State.set(this, V, Part);
break;
}
@@ -223,7 +230,7 @@ void VPInstruction::generateInstruction(VPTransformState &State,
auto *PredTy = VectorType::get(Int1Ty, State.VF);
Instruction *Call = Builder.CreateIntrinsic(
Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
- {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
+ {VIVElem0, ScalarTC}, nullptr, Name);
State.set(this, Call, Part);
break;
}
@@ -247,7 +254,8 @@ void VPInstruction::generateInstruction(VPTransformState &State,
State.set(this, PartMinus1, Part);
} else {
Value *V2 = State.get(getOperand(1), Part);
- State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
+ State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name),
+ Part);
}
break;
}
@@ -261,7 +269,7 @@ void VPInstruction::generateInstruction(VPTransformState &State,
// elements) times the unroll factor (num of SIMD instructions).
Value *Step =
createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
- Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
+ Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
} else {
Next = State.get(this, 0);
}
@@ -269,6 +277,23 @@ void VPInstruction::generateInstruction(VPTransformState &State,
State.set(this, Next, Part);
break;
}
+
+ case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::CanonicalIVIncrementForPartNUW: {
+ bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW;
+ auto *IV = State.get(getOperand(0), VPIteration(0, 0));
+ if (Part == 0) {
+ State.set(this, IV, Part);
+ break;
+ }
+
+ // The canonical IV is incremented by the vectorization factor (num of SIMD
+ // elements) times the unroll part.
+ Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
+ Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false);
+ State.set(this, Next, Part);
+ break;
+ }
case VPInstruction::BranchOnCond: {
if (Part != 0)
break;
@@ -370,6 +395,12 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::BranchOnCond:
O << "branch-on-cond";
break;
+ case VPInstruction::CanonicalIVIncrementForPart:
+ O << "VF * Part + ";
+ break;
+ case VPInstruction::CanonicalIVIncrementForPartNUW:
+ O << "VF * Part +(nuw) ";
+ break;
case VPInstruction::BranchOnCount:
O << "branch-on-count ";
break;
@@ -431,7 +462,158 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
getOperand(2)->printAsOperand(O, SlotTracker);
O << (InvariantCond ? " (condition is loop invariant)" : "");
}
+#endif
+
+void VPWidenSelectRecipe::execute(VPTransformState &State) {
+ auto &I = *cast<SelectInst>(getUnderlyingInstr());
+ State.setDebugLocFromInst(&I);
+
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ auto *InvarCond =
+ InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
+ Value *Op0 = State.get(getOperand(1), Part);
+ Value *Op1 = State.get(getOperand(2), Part);
+ Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
+ State.set(this, Sel, Part);
+ State.addMetadata(Sel, &I);
+ }
+}
+
+void VPWidenRecipe::execute(VPTransformState &State) {
+ auto &I = *cast<Instruction>(getUnderlyingValue());
+ auto &Builder = State.Builder;
+ switch (I.getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Br:
+ case Instruction::PHI:
+ case Instruction::GetElementPtr:
+ case Instruction::Select:
+ llvm_unreachable("This instruction is handled by a different recipe.");
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::FNeg:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Just widen unops and binops.
+ State.setDebugLocFromInst(&I);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ SmallVector<Value *, 2> Ops;
+ for (VPValue *VPOp : operands())
+ Ops.push_back(State.get(VPOp, Part));
+
+ Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
+
+ if (auto *VecOp = dyn_cast<Instruction>(V)) {
+ VecOp->copyIRFlags(&I);
+ // If the instruction is vectorized and was in a basic block that needed
+ // predication, we can't propagate poison-generating flags (nuw/nsw,
+ // exact, etc.). The control flow has been linearized and the
+ // instruction is no longer guarded by the predicate, which could make
+ // the flag properties to no longer hold.
+ if (State.MayGeneratePoisonRecipes.contains(this))
+ VecOp->dropPoisonGeneratingFlags();
+ }
+
+ // Use this vector value for all users of the original instruction.
+ State.set(this, V, Part);
+ State.addMetadata(V, &I);
+ }
+
+ break;
+ }
+ case Instruction::Freeze: {
+ State.setDebugLocFromInst(&I);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Op = State.get(getOperand(0), Part);
+
+ Value *Freeze = Builder.CreateFreeze(Op);
+ State.set(this, Freeze, Part);
+ }
+ break;
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Widen compares. Generate vector compares.
+ bool FCmp = (I.getOpcode() == Instruction::FCmp);
+ auto *Cmp = cast<CmpInst>(&I);
+ State.setDebugLocFromInst(Cmp);
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *B = State.get(getOperand(1), Part);
+ Value *C = nullptr;
+ if (FCmp) {
+ // Propagate fast math flags.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ Builder.setFastMathFlags(Cmp->getFastMathFlags());
+ C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+ } else {
+ C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+ }
+ State.set(this, C, Part);
+ State.addMetadata(C, &I);
+ }
+
+ break;
+ }
+
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ auto *CI = cast<CastInst>(&I);
+ State.setDebugLocFromInst(CI);
+
+ /// Vectorize casts.
+ Type *DestTy = (State.VF.isScalar())
+ ? CI->getType()
+ : VectorType::get(CI->getType(), State.VF);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+ State.set(this, Cast, Part);
+ State.addMetadata(Cast, &I);
+ }
+ break;
+ }
+ default:
+ // This instruction is not vectorized by simple widening.
+ LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
+ llvm_unreachable("Unhandled instruction!");
+ } // end of switch.
+}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
@@ -487,7 +669,82 @@ void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent << "= SCALAR-STEPS ";
printOperands(O, SlotTracker);
}
+#endif
+
+void VPWidenGEPRecipe::execute(VPTransformState &State) {
+ auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
+ // Construct a vector GEP by widening the operands of the scalar GEP as
+ // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
+ // results in a vector of pointers when at least one operand of the GEP
+ // is vector-typed. Thus, to keep the representation compact, we only use
+ // vector-typed operands for loop-varying values.
+
+ if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
+ // If we are vectorizing, but the GEP has only loop-invariant operands,
+ // the GEP we build (by only using vector-typed operands for
+ // loop-varying values) would be a scalar pointer. Thus, to ensure we
+ // produce a vector of pointers, we need to either arbitrarily pick an
+ // operand to broadcast, or broadcast a clone of the original GEP.
+ // Here, we broadcast a clone of the original.
+ //
+ // TODO: If at some point we decide to scalarize instructions having
+ // loop-invariant operands, this special case will no longer be
+ // required. We would add the scalarization decision to
+ // collectLoopScalars() and teach getVectorValue() to broadcast
+ // the lane-zero scalar value.
+ auto *Clone = State.Builder.Insert(GEP->clone());
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
+ State.set(this, EntryPart, Part);
+ State.addMetadata(EntryPart, GEP);
+ }
+ } else {
+ // If the GEP has at least one loop-varying operand, we are sure to
+ // produce a vector of pointers. But if we are only unrolling, we want
+ // to produce a scalar GEP for each unroll part. Thus, the GEP we
+ // produce with the code below will be scalar (if VF == 1) or vector
+ // (otherwise). Note that for the unroll-only case, we still maintain
+ // values in the vector mapping with initVector, as we do for other
+ // instructions.
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ // The pointer operand of the new GEP. If it's loop-invariant, we
+ // won't broadcast it.
+ auto *Ptr = IsPtrLoopInvariant
+ ? State.get(getOperand(0), VPIteration(0, 0))
+ : State.get(getOperand(0), Part);
+
+ // Collect all the indices for the new GEP. If any index is
+ // loop-invariant, we won't broadcast it.
+ SmallVector<Value *, 4> Indices;
+ for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
+ VPValue *Operand = getOperand(I);
+ if (IsIndexLoopInvariant[I - 1])
+ Indices.push_back(State.get(Operand, VPIteration(0, 0)));
+ else
+ Indices.push_back(State.get(Operand, Part));
+ }
+
+ // If the GEP instruction is vectorized and was in a basic block that
+ // needed predication, we can't propagate the poison-generating 'inbounds'
+ // flag. The control flow has been linearized and the GEP is no longer
+ // guarded by the predicate, which could make the 'inbounds' properties to
+ // no longer hold.
+ bool IsInBounds =
+ GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
+
+ // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
+ // but it should be a vector, otherwise.
+ auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
+ Indices, "", IsInBounds);
+ assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
+ "NewGEP is not a pointer vector");
+ State.set(this, NewGEP, Part);
+ State.addMetadata(NewGEP, GEP);
+ }
+ }
+}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-GEP ";
@@ -501,7 +758,48 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = getelementptr ";
printOperands(O, SlotTracker);
}
+#endif
+
+void VPBlendRecipe::execute(VPTransformState &State) {
+ State.setDebugLocFromInst(Phi);
+ // We know that all PHIs in non-header blocks are converted into
+ // selects, so we don't have to worry about the insertion order and we
+ // can just use the builder.
+ // At this point we generate the predication tree. There may be
+ // duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+
+ unsigned NumIncoming = getNumIncomingValues();
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // SELECT(Mask1, In1,
+ // In0)))
+ // Note that Mask0 is never used: lanes for which no path reaches this phi and
+ // are essentially undef are taken from In0.
+ VectorParts Entry(State.UF);
+ for (unsigned In = 0; In < NumIncoming; ++In) {
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ // We might have single edge PHIs (blocks) - use an identity
+ // 'select' for the first PHI operand.
+ Value *In0 = State.get(getIncomingValue(In), Part);
+ if (In == 0)
+ Entry[Part] = In0; // Initialize with the first incoming value.
+ else {
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Value *Cond = State.get(getMask(In), Part);
+ Entry[Part] =
+ State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
+ }
+ }
+ }
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ State.set(this, Entry[Part], Part);
+}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "BLEND ";
@@ -566,7 +864,35 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
if (AlsoPack)
O << " (S->V)";
}
+#endif
+
+void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
+ assert(State.Instance && "Branch on Mask works only on single instance.");
+ unsigned Part = State.Instance->Part;
+ unsigned Lane = State.Instance->Lane.getKnownLane();
+
+ Value *ConditionBit = nullptr;
+ VPValue *BlockInMask = getMask();
+ if (BlockInMask) {
+ ConditionBit = State.get(BlockInMask, Part);
+ if (ConditionBit->getType()->isVectorTy())
+ ConditionBit = State.Builder.CreateExtractElement(
+ ConditionBit, State.Builder.getInt32(Lane));
+ } else // Block in mask is all-one.
+ ConditionBit = State.Builder.getTrue();
+
+ // Replace the temporary unreachable terminator with a new conditional branch,
+ // whose two destinations will be set later when they are created.
+ auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
+ assert(isa<UnreachableInst>(CurrentTerminator) &&
+ "Expected to replace unreachable terminator with conditional branch.");
+ auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
+ CondBr->setSuccessor(0, nullptr);
+ ReplaceInstWithInst(CurrentTerminator, CondBr);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "PHI-PREDICATED-INSTRUCTION ";
@@ -838,3 +1164,28 @@ void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
printOperands(O, SlotTracker);
}
#endif
+
+// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
+// remove VPActiveLaneMaskPHIRecipe.
+void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
+ Value *StartMask = State.get(getOperand(0), Part);
+ PHINode *EntryPart =
+ State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
+ EntryPart->addIncoming(StartMask, VectorPH);
+ EntryPart->setDebugLoc(DL);
+ State.set(this, EntryPart, Part);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "ACTIVE-LANE-MASK-PHI ";
+
+ printAsOperand(O, SlotTracker);
+ O << " = phi ";
+ printOperands(O, SlotTracker);
+}
+#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 5fc676834331..c99fae1b2ab4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -103,6 +103,7 @@ public:
// Phi-like VPValues. Need to be kept together.
VPVBlendSC,
VPVCanonicalIVPHISC,
+ VPVActiveLaneMaskPHISC,
VPVFirstOrderRecurrencePHISC,
VPVWidenPHISC,
VPVWidenIntOrFpInductionSC,
@@ -358,6 +359,7 @@ public:
// Phi-like recipes. Need to be kept together.
VPBlendSC,
VPCanonicalIVPHISC,
+ VPActiveLaneMaskPHISC,
VPFirstOrderRecurrencePHISC,
VPWidenPHISC,
VPWidenIntOrFpInductionSC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index f917883145c0..3501de6ab38e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -133,32 +133,48 @@ void VPlanVerifier::verifyHierarchicalCFG(
verifyRegionRec(TopRegion);
}
-bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
- auto Iter = depth_first(
- VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
- for (const VPBasicBlock *VPBB :
- VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) {
- // Verify that phi-like recipes are at the beginning of the block, with no
- // other recipes in between.
- auto RecipeI = VPBB->begin();
- auto End = VPBB->end();
- while (RecipeI != End && RecipeI->isPhi())
- RecipeI++;
+static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) {
+ // Verify that phi-like recipes are at the beginning of the block, with no
+ // other recipes in between.
+ auto RecipeI = VPBB->begin();
+ auto End = VPBB->end();
+ unsigned NumActiveLaneMaskPhiRecipes = 0;
+ while (RecipeI != End && RecipeI->isPhi()) {
+ if (isa<VPActiveLaneMaskPHIRecipe>(RecipeI))
+ NumActiveLaneMaskPhiRecipes++;
+ RecipeI++;
+ }
- while (RecipeI != End) {
- if (RecipeI->isPhi() && !isa<VPBlendRecipe>(&*RecipeI)) {
- errs() << "Found phi-like recipe after non-phi recipe";
+ if (NumActiveLaneMaskPhiRecipes > 1) {
+ errs() << "There should be no more than one VPActiveLaneMaskPHIRecipe";
+ return false;
+ }
+
+ while (RecipeI != End) {
+ if (RecipeI->isPhi() && !isa<VPBlendRecipe>(&*RecipeI)) {
+ errs() << "Found phi-like recipe after non-phi recipe";
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- errs() << ": ";
- RecipeI->dump();
- errs() << "after\n";
- std::prev(RecipeI)->dump();
+ errs() << ": ";
+ RecipeI->dump();
+ errs() << "after\n";
+ std::prev(RecipeI)->dump();
#endif
- return false;
- }
- RecipeI++;
+ return false;
}
+ RecipeI++;
+ }
+
+ return true;
+}
+
+bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
+ auto Iter = depth_first(
+ VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
+ for (const VPBasicBlock *VPBB :
+ VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) {
+ if (!verifyVPBasicBlock(VPBB))
+ return false;
}
const VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
@@ -181,15 +197,16 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
}
if (Exiting->empty()) {
- errs() << "VPlan vector loop exiting block must end with BranchOnCount "
- "VPInstruction but is empty\n";
+ errs() << "VPlan vector loop exiting block must end with BranchOnCount or "
+ "BranchOnCond VPInstruction but is empty\n";
return false;
}
auto *LastInst = dyn_cast<VPInstruction>(std::prev(Exiting->end()));
- if (!LastInst || LastInst->getOpcode() != VPInstruction::BranchOnCount) {
- errs() << "VPlan vector loop exit must end with BranchOnCount "
- "VPInstruction\n";
+ if (!LastInst || (LastInst->getOpcode() != VPInstruction::BranchOnCount &&
+ LastInst->getOpcode() != VPInstruction::BranchOnCond)) {
+ errs() << "VPlan vector loop exit must end with BranchOnCount or "
+ "BranchOnCond VPInstruction\n";
return false;
}
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 90598937affc..d12624ffb824 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -414,6 +414,10 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
unsigned NewIndex,
IRBuilder<> &Builder) {
+ // Shufflevectors can only be created for fixed-width vectors.
+ if (!isa<FixedVectorType>(ExtElt->getOperand(0)->getType()))
+ return nullptr;
+
// If the extract can be constant-folded, this code is unsimplified. Defer
// to other passes to handle that.
Value *X = ExtElt->getVectorOperand();
@@ -1249,14 +1253,20 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
VT != Op0->getType())
return false;
- auto *SVI0A = dyn_cast<ShuffleVectorInst>(Op0->getOperand(0));
- auto *SVI0B = dyn_cast<ShuffleVectorInst>(Op0->getOperand(1));
- auto *SVI1A = dyn_cast<ShuffleVectorInst>(Op1->getOperand(0));
- auto *SVI1B = dyn_cast<ShuffleVectorInst>(Op1->getOperand(1));
+ auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
+ auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
+ auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
+ auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
+ SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
auto checkSVNonOpUses = [&](Instruction *I) {
if (!I || I->getOperand(0)->getType() != VT)
return true;
- return any_of(I->users(), [&](User *U) { return U != Op0 && U != Op1; });
+ return any_of(I->users(), [&](User *U) {
+ return U != Op0 && U != Op1 &&
+ !(isa<ShuffleVectorInst>(U) &&
+ (InputShuffles.contains(cast<Instruction>(U)) ||
+ isInstructionTriviallyDead(cast<Instruction>(U))));
+ });
};
if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
@@ -1271,6 +1281,9 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
auto *SV = dyn_cast<ShuffleVectorInst>(U);
if (!SV || SV->getType() != VT)
return false;
+ if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
+ (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
+ return false;
if (!llvm::is_contained(Shuffles, SV))
Shuffles.push_back(SV);
}
@@ -1283,13 +1296,25 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
if (FromReduction && Shuffles.size() > 1)
return false;
+ // Add any shuffle uses for the shuffles we have found, to include them in our
+ // cost calculations.
+ if (!FromReduction) {
+ for (ShuffleVectorInst *SV : Shuffles) {
+ for (auto U : SV->users()) {
+ ShuffleVectorInst *SSV = dyn_cast<ShuffleVectorInst>(U);
+ if (SSV && isa<UndefValue>(SSV->getOperand(1)))
+ Shuffles.push_back(SSV);
+ }
+ }
+ }
+
// For each of the output shuffles, we try to sort all the first vector
// elements to the beginning, followed by the second array elements at the
// end. If the binops are legalized to smaller vectors, this may reduce total
// number of binops. We compute the ReconstructMask mask needed to convert
// back to the original lane order.
- SmallVector<int> V1, V2;
- SmallVector<SmallVector<int>> ReconstructMasks;
+ SmallVector<std::pair<int, int>> V1, V2;
+ SmallVector<SmallVector<int>> OrigReconstructMasks;
int MaxV1Elt = 0, MaxV2Elt = 0;
unsigned NumElts = VT->getNumElements();
for (ShuffleVectorInst *SVN : Shuffles) {
@@ -1300,6 +1325,16 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
// case we need to commute the mask).
Value *SVOp0 = SVN->getOperand(0);
Value *SVOp1 = SVN->getOperand(1);
+ if (isa<UndefValue>(SVOp1)) {
+ auto *SSV = cast<ShuffleVectorInst>(SVOp0);
+ SVOp0 = SSV->getOperand(0);
+ SVOp1 = SSV->getOperand(1);
+ for (unsigned I = 0, E = Mask.size(); I != E; I++) {
+ if (Mask[I] >= static_cast<int>(SSV->getShuffleMask().size()))
+ return false;
+ Mask[I] = Mask[I] < 0 ? Mask[I] : SSV->getMaskValue(Mask[I]);
+ }
+ }
if (SVOp0 == Op1 && SVOp1 == Op0) {
std::swap(SVOp0, SVOp1);
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
@@ -1316,21 +1351,25 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
ReconstructMask.push_back(-1);
} else if (Mask[I] < static_cast<int>(NumElts)) {
MaxV1Elt = std::max(MaxV1Elt, Mask[I]);
- auto It = find(V1, Mask[I]);
+ auto It = find_if(V1, [&](const std::pair<int, int> &A) {
+ return Mask[I] == A.first;
+ });
if (It != V1.end())
ReconstructMask.push_back(It - V1.begin());
else {
ReconstructMask.push_back(V1.size());
- V1.push_back(Mask[I]);
+ V1.emplace_back(Mask[I], V1.size());
}
} else {
MaxV2Elt = std::max<int>(MaxV2Elt, Mask[I] - NumElts);
- auto It = find(V2, Mask[I] - NumElts);
+ auto It = find_if(V2, [&](const std::pair<int, int> &A) {
+ return Mask[I] - static_cast<int>(NumElts) == A.first;
+ });
if (It != V2.end())
ReconstructMask.push_back(NumElts + It - V2.begin());
else {
ReconstructMask.push_back(NumElts + V2.size());
- V2.push_back(Mask[I] - NumElts);
+ V2.emplace_back(Mask[I] - NumElts, NumElts + V2.size());
}
}
}
@@ -1339,7 +1378,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
// result. In-order can help simplify the shuffle away.
if (FromReduction)
sort(ReconstructMask);
- ReconstructMasks.push_back(ReconstructMask);
+ OrigReconstructMasks.push_back(std::move(ReconstructMask));
}
// If the Maximum element used from V1 and V2 are not larger than the new
@@ -1351,16 +1390,68 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
MaxV2Elt == static_cast<int>(V2.size()) - 1))
return false;
+ // GetBaseMaskValue takes one of the inputs, which may either be a shuffle, a
+ // shuffle of another shuffle, or not a shuffle (that is treated like a
+ // identity shuffle).
+ auto GetBaseMaskValue = [&](Instruction *I, int M) {
+ auto *SV = dyn_cast<ShuffleVectorInst>(I);
+ if (!SV)
+ return M;
+ if (isa<UndefValue>(SV->getOperand(1)))
+ if (auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
+ if (InputShuffles.contains(SSV))
+ return SSV->getMaskValue(SV->getMaskValue(M));
+ return SV->getMaskValue(M);
+ };
+
+ // Attempt to sort the inputs my ascending mask values to make simpler input
+ // shuffles and push complex shuffles down to the uses. We sort on the first
+ // of the two input shuffle orders, to try and get at least one input into a
+ // nice order.
+ auto SortBase = [&](Instruction *A, std::pair<int, int> X,
+ std::pair<int, int> Y) {
+ int MXA = GetBaseMaskValue(A, X.first);
+ int MYA = GetBaseMaskValue(A, Y.first);
+ return MXA < MYA;
+ };
+ stable_sort(V1, [&](std::pair<int, int> A, std::pair<int, int> B) {
+ return SortBase(SVI0A, A, B);
+ });
+ stable_sort(V2, [&](std::pair<int, int> A, std::pair<int, int> B) {
+ return SortBase(SVI1A, A, B);
+ });
+ // Calculate our ReconstructMasks from the OrigReconstructMasks and the
+ // modified order of the input shuffles.
+ SmallVector<SmallVector<int>> ReconstructMasks;
+ for (auto Mask : OrigReconstructMasks) {
+ SmallVector<int> ReconstructMask;
+ for (int M : Mask) {
+ auto FindIndex = [](const SmallVector<std::pair<int, int>> &V, int M) {
+ auto It = find_if(V, [M](auto A) { return A.second == M; });
+ assert(It != V.end() && "Expected all entries in Mask");
+ return std::distance(V.begin(), It);
+ };
+ if (M < 0)
+ ReconstructMask.push_back(-1);
+ else if (M < static_cast<int>(NumElts)) {
+ ReconstructMask.push_back(FindIndex(V1, M));
+ } else {
+ ReconstructMask.push_back(NumElts + FindIndex(V2, M));
+ }
+ }
+ ReconstructMasks.push_back(std::move(ReconstructMask));
+ }
+
// Calculate the masks needed for the new input shuffles, which get padded
// with undef
SmallVector<int> V1A, V1B, V2A, V2B;
for (unsigned I = 0; I < V1.size(); I++) {
- V1A.push_back(SVI0A->getMaskValue(V1[I]));
- V1B.push_back(SVI0B->getMaskValue(V1[I]));
+ V1A.push_back(GetBaseMaskValue(SVI0A, V1[I].first));
+ V1B.push_back(GetBaseMaskValue(SVI0B, V1[I].first));
}
for (unsigned I = 0; I < V2.size(); I++) {
- V2A.push_back(SVI1A->getMaskValue(V2[I]));
- V2B.push_back(SVI1B->getMaskValue(V2[I]));
+ V2A.push_back(GetBaseMaskValue(SVI1A, V2[I].first));
+ V2B.push_back(GetBaseMaskValue(SVI1B, V2[I].first));
}
while (V1A.size() < NumElts) {
V1A.push_back(UndefMaskElem);
@@ -1371,9 +1462,14 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
V2B.push_back(UndefMaskElem);
}
- auto AddShuffleCost = [&](InstructionCost C, ShuffleVectorInst *SV) {
- return C +
- TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, SV->getShuffleMask());
+ auto AddShuffleCost = [&](InstructionCost C, Instruction *I) {
+ auto *SV = dyn_cast<ShuffleVectorInst>(I);
+ if (!SV)
+ return C;
+ return C + TTI.getShuffleCost(isa<UndefValue>(SV->getOperand(1))
+ ? TTI::SK_PermuteSingleSrc
+ : TTI::SK_PermuteTwoSrc,
+ VT, SV->getShuffleMask());
};
auto AddShuffleMaskCost = [&](InstructionCost C, ArrayRef<int> Mask) {
return C + TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, Mask);
@@ -1386,9 +1482,6 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
TTI.getArithmeticInstrCost(Op1->getOpcode(), VT);
CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
InstructionCost(0), AddShuffleCost);
- // This set helps us only cost each unique shuffle once.
- SmallPtrSet<ShuffleVectorInst *, 4> InputShuffles(
- {SVI0A, SVI0B, SVI1A, SVI1B});
CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
InstructionCost(0), AddShuffleCost);
@@ -1408,22 +1501,35 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
InstructionCost(0), AddShuffleMaskCost);
+ LLVM_DEBUG(dbgs() << "Found a binop select shuffle pattern: " << I << "\n");
+ LLVM_DEBUG(dbgs() << " CostBefore: " << CostBefore
+ << " vs CostAfter: " << CostAfter << "\n");
if (CostBefore <= CostAfter)
return false;
// The cost model has passed, create the new instructions.
- Builder.SetInsertPoint(SVI0A);
- Value *NSV0A = Builder.CreateShuffleVector(SVI0A->getOperand(0),
- SVI0A->getOperand(1), V1A);
- Builder.SetInsertPoint(SVI0B);
- Value *NSV0B = Builder.CreateShuffleVector(SVI0B->getOperand(0),
- SVI0B->getOperand(1), V1B);
- Builder.SetInsertPoint(SVI1A);
- Value *NSV1A = Builder.CreateShuffleVector(SVI1A->getOperand(0),
- SVI1A->getOperand(1), V2A);
- Builder.SetInsertPoint(SVI1B);
- Value *NSV1B = Builder.CreateShuffleVector(SVI1B->getOperand(0),
- SVI1B->getOperand(1), V2B);
+ auto GetShuffleOperand = [&](Instruction *I, unsigned Op) -> Value * {
+ auto *SV = dyn_cast<ShuffleVectorInst>(I);
+ if (!SV)
+ return I;
+ if (isa<UndefValue>(SV->getOperand(1)))
+ if (auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
+ if (InputShuffles.contains(SSV))
+ return SSV->getOperand(Op);
+ return SV->getOperand(Op);
+ };
+ Builder.SetInsertPoint(SVI0A->getNextNode());
+ Value *NSV0A = Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0),
+ GetShuffleOperand(SVI0A, 1), V1A);
+ Builder.SetInsertPoint(SVI0B->getNextNode());
+ Value *NSV0B = Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0),
+ GetShuffleOperand(SVI0B, 1), V1B);
+ Builder.SetInsertPoint(SVI1A->getNextNode());
+ Value *NSV1A = Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0),
+ GetShuffleOperand(SVI1A, 1), V2A);
+ Builder.SetInsertPoint(SVI1B->getNextNode());
+ Value *NSV1B = Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0),
+ GetShuffleOperand(SVI1B, 1), V2B);
Builder.SetInsertPoint(Op0);
Value *NOp0 = Builder.CreateBinOp((Instruction::BinaryOps)Op0->getOpcode(),
NSV0A, NSV0B);