summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasSetTracker.cpp4
-rw-r--r--lib/Analysis/AssumptionCache.cpp11
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp19
-rw-r--r--lib/Analysis/CFLSteensAliasAnalysis.cpp3
-rw-r--r--lib/Analysis/DemandedBits.cpp2
-rw-r--r--lib/Analysis/InlineCost.cpp16
-rw-r--r--lib/Analysis/InstructionSimplify.cpp6
-rw-r--r--lib/Analysis/LazyValueInfo.cpp30
-rw-r--r--lib/Analysis/Loads.cpp10
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp6
-rw-r--r--lib/Analysis/ScalarEvolution.cpp133
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp48
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp2
-rw-r--r--lib/Analysis/ValueTracking.cpp7
-rw-r--r--lib/BinaryFormat/Magic.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp14
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp68
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp54
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp9
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp12
-rw-r--r--lib/CodeGen/GlobalISel/Legalizer.cpp139
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp3
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp34
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp28
-rw-r--r--lib/CodeGen/IfConversion.cpp13
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp39
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp2
-rw-r--r--lib/CodeGen/LiveDebugVariables.h2
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp6
-rw-r--r--lib/CodeGen/MachineInstr.cpp18
-rw-r--r--lib/CodeGen/MachineModuleInfoImpls.cpp1
-rw-r--r--lib/CodeGen/MachineScheduler.cpp172
-rw-r--r--lib/CodeGen/MacroFusion.cpp150
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp428
-rw-r--r--lib/CodeGen/RegisterUsageInfo.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt1
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp256
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp29
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp78
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp33
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp95
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp24
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp20
-rw-r--r--lib/CodeGen/TargetSubtargetInfo.cpp14
-rw-r--r--lib/DebugInfo/CodeView/CMakeLists.txt3
-rw-r--r--lib/DebugInfo/CodeView/CVTypeVisitor.cpp2
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp30
-rw-r--r--lib/DebugInfo/CodeView/EnumTables.cpp10
-rw-r--r--lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp157
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp2
-rw-r--r--lib/DebugInfo/CodeView/SymbolRecordMapping.cpp2
-rw-r--r--lib/DebugInfo/CodeView/TypeDatabase.cpp146
-rw-r--r--lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp330
-rw-r--r--lib/DebugInfo/CodeView/TypeDumpVisitor.cpp2
-rw-r--r--lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp82
-rw-r--r--lib/DebugInfo/CodeView/TypeName.cpp243
-rw-r--r--lib/DebugInfo/CodeView/TypeTableCollection.cpp58
-rw-r--r--lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp80
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp15
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugFrame.cpp36
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp2
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLine.cpp52
-rw-r--r--lib/DebugInfo/DWARF/DWARFFormValue.cpp113
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnit.cpp42
-rw-r--r--lib/DebugInfo/DWARF/DWARFVerifier.cpp6
-rw-r--r--lib/DebugInfo/MSF/MSFBuilder.cpp27
-rw-r--r--lib/DebugInfo/MSF/MSFCommon.cpp6
-rw-r--r--lib/DebugInfo/MSF/MappedBlockStream.cpp23
-rw-r--r--lib/DebugInfo/PDB/DIA/DIASession.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp8
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStream.cpp52
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp9
-rw-r--r--lib/DebugInfo/PDB/Native/InfoStream.cpp8
-rw-r--r--lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp36
-rw-r--r--lib/DebugInfo/PDB/Native/NamedStreamMap.cpp24
-rw-r--r--lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp9
-rw-r--r--lib/DebugInfo/PDB/Native/NativeEnumModules.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp9
-rw-r--r--lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp8
-rw-r--r--lib/DebugInfo/PDB/Native/NativeSession.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFile.cpp8
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStream.cpp12
-rw-r--r--lib/ExecutionEngine/Orc/OrcCBindings.cpp32
-rw-r--r--lib/ExecutionEngine/Orc/OrcCBindingsStack.h82
-rw-r--r--lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp5
-rw-r--r--lib/ExecutionEngine/Orc/OrcMCJITReplacement.h119
-rw-r--r--lib/IR/AsmWriter.cpp147
-rw-r--r--lib/IR/AttributeImpl.h5
-rw-r--r--lib/IR/Attributes.cpp33
-rw-r--r--lib/IR/AutoUpgrade.cpp39
-rw-r--r--lib/IR/BasicBlock.cpp13
-rw-r--r--lib/IR/ConstantRange.cpp30
-rw-r--r--lib/IR/Instructions.cpp25
-rw-r--r--lib/IR/LLVMContextImpl.cpp30
-rw-r--r--lib/IR/LLVMContextImpl.h148
-rw-r--r--lib/IR/Metadata.cpp14
-rw-r--r--lib/IR/Statepoint.cpp12
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp1
-rw-r--r--lib/MC/CMakeLists.txt2
-rw-r--r--lib/MC/ELFObjectWriter.cpp3
-rw-r--r--lib/MC/MCAssembler.cpp36
-rw-r--r--lib/MC/MCFragment.cpp18
-rw-r--r--lib/MC/MCSection.cpp2
-rw-r--r--lib/MC/MCWasmStreamer.cpp12
-rw-r--r--lib/MC/MCWinCOFFStreamer.cpp (renamed from lib/MC/WinCOFFStreamer.cpp)8
-rw-r--r--lib/MC/WasmObjectWriter.cpp318
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp24
-rw-r--r--lib/Object/COFFObjectFile.cpp20
-rw-r--r--lib/Object/IRSymtab.cpp36
-rw-r--r--lib/Object/MachOObjectFile.cpp39
-rw-r--r--lib/Object/WasmObjectFile.cpp93
-rw-r--r--lib/Object/WindowsResource.cpp270
-rw-r--r--lib/ObjectYAML/CodeViewYAMLSymbols.cpp57
-rw-r--r--lib/ObjectYAML/CodeViewYAMLTypes.cpp2
-rw-r--r--lib/ObjectYAML/WasmYAML.cpp50
-rw-r--r--lib/Option/OptTable.cpp34
-rw-r--r--lib/Option/Option.cpp1
-rw-r--r--lib/Passes/PassBuilder.cpp8
-rw-r--r--lib/ProfileData/Coverage/CoverageMapping.cpp10
-rw-r--r--lib/ProfileData/Coverage/CoverageMappingReader.cpp18
-rw-r--r--lib/ProfileData/InstrProf.cpp12
-rw-r--r--lib/ProfileData/InstrProfReader.cpp16
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp21
-rw-r--r--lib/Support/APFloat.cpp67
-rw-r--r--lib/Support/APInt.cpp9
-rw-r--r--lib/Support/BinaryStreamReader.cpp6
-rw-r--r--lib/Support/CachePruning.cpp55
-rw-r--r--lib/Support/CommandLine.cpp11
-rw-r--r--lib/Support/DataExtractor.cpp7
-rw-r--r--lib/Support/GraphWriter.cpp19
-rw-r--r--lib/Support/Triple.cpp2
-rw-r--r--lib/Support/Unix/Host.inc6
-rw-r--r--lib/Support/Unix/Memory.inc4
-rw-r--r--lib/Support/Unix/Program.inc13
-rw-r--r--lib/Support/Windows/Host.inc10
-rw-r--r--lib/Support/YAMLParser.cpp58
-rw-r--r--lib/Support/YAMLTraits.cpp50
-rw-r--r--lib/Support/raw_ostream.cpp4
-rw-r--r--lib/Target/AArch64/AArch64.h2
-rw-r--r--lib/Target/AArch64/AArch64CondBrTuning.cpp336
-rw-r--r--lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp47
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp15
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp11
-rw-r--r--lib/Target/AArch64/AArch64InstrAtomics.td46
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp6
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h38
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp20
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.cpp183
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.h11
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.cpp4
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorDetails.td421
-rw-r--r--lib/Target/AArch64/AArch64SchedKryoDetails.td6
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp7
-rw-r--r--lib/Target/AArch64/CMakeLists.txt1
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp70
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td47
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp23
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h25
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.cpp11
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td10
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp10
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp5
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h25
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp8
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp22
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h6
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp151
-rw-r--r--lib/Target/AMDGPU/BUFInstructions.td411
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp100
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h10
-rw-r--r--lib/Target/AMDGPU/FLATInstructions.td70
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp34
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h8
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp48
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h12
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp64
-rw-r--r--lib/Target/AMDGPU/Processors.td4
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp24
-rw-r--r--lib/Target/AMDGPU/SIDefines.h6
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp30
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp12
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp83
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.h4
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp261
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h5
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp71
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h3
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td195
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp11
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h14
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp100
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp40
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h7
-rw-r--r--lib/Target/AMDGPU/SIShrinkInstructions.cpp2
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp21
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h3
-rw-r--r--lib/Target/AMDGPU/VOP1Instructions.td16
-rw-r--r--lib/Target/AMDGPU/VOP2Instructions.td68
-rw-r--r--lib/Target/AMDGPU/VOPCInstructions.td28
-rw-r--r--lib/Target/AMDGPU/VOPInstructions.td88
-rw-r--r--lib/Target/ARM/ARM.td3
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp3
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp54
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td10
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp105
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMMacroFusion.cpp57
-rw-r--r--lib/Target/ARM/ARMMacroFusion.h24
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp10
-rw-r--r--lib/Target/ARM/ARMSubtarget.h8
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp7
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp66
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp63
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp7
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp30
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp12
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp61
-rw-r--r--lib/Target/Hexagon/HexagonGenMux.cpp86
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp54
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp32
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h22
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp18
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp53
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp12
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp8
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp24
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp8
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h5
-rw-r--r--lib/Target/Mips/Mips32r6InstrInfo.td6
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp5
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp12
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp18
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp11
-rw-r--r--lib/Target/PowerPC/PPC.h1
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp11
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td8
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp23
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp15
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp72
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h25
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp12
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp12
-rw-r--r--lib/Target/Sparc/SparcTargetObjectFile.cpp6
-rw-r--r--lib/Target/Sparc/SparcTargetObjectFile.h2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp15
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp17
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp15
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h6
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td11
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.h2
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td2
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp30
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp18
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h3
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFastISel.cpp20
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp29
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegStackify.cpp32
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp17
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp37
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp28
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp190
-rw-r--r--lib/Target/X86/X86ISelLowering.h26
-rw-r--r--lib/Target/X86/X86InstrAVX512.td26
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td24
-rw-r--r--lib/Target/X86/X86InstrSSE.td2
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp300
-rw-r--r--lib/Target/X86/X86InterleavedAccess.cpp125
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h24
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp32
-rw-r--r--lib/Target/X86/X86MacroFusion.cpp101
-rw-r--r--lib/Target/X86/X86MacroFusion.h11
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp2
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp6
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h5
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp129
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h7
-rw-r--r--lib/Testing/Support/LLVMBuild.txt1
-rw-r--r--lib/ToolDrivers/llvm-lib/LibDriver.cpp10
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp13
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp27
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp126
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp46
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp99
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h27
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp43
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp63
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp14
-rw-r--r--lib/Transforms/Instrumentation/IndirectCallPromotion.cpp7
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp230
-rw-r--r--lib/Transforms/Instrumentation/PGOInstrumentation.cpp2
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp5
-rw-r--r--lib/Transforms/Scalar/GVN.cpp188
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp100
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp295
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp53
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp2
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp50
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp110
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp15
-rw-r--r--lib/Transforms/Scalar/SROA.cpp15
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp1
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp52
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp1
-rw-r--r--lib/Transforms/Utils/Local.cpp54
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp83
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp46
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp68
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp47
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp6
336 files changed, 8803 insertions, 4902 deletions
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index ee17ad3ba5863..4dfa25490d00d 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -218,8 +218,8 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
return false;
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
- if (auto *Inst = getUnknownInst(i)) {
- ImmutableCallSite C1(Inst), C2(Inst);
+ if (auto *UnknownInst = getUnknownInst(i)) {
+ ImmutableCallSite C1(UnknownInst), C2(Inst);
if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef ||
AA.getModRefInfo(C2, C1) != MRI_NoModRef)
return true;
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index 0468c794e81dd..3ff27890dc385 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -84,18 +84,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
Value *B;
ConstantInt *C;
// (A & B) or (A | B) or (A ^ B).
- if (match(V,
- m_CombineOr(m_And(m_Value(A), m_Value(B)),
- m_CombineOr(m_Or(m_Value(A), m_Value(B)),
- m_Xor(m_Value(A), m_Value(B)))))) {
+ if (match(V, m_BitwiseLogic(m_Value(A), m_Value(B)))) {
AddAffected(A);
AddAffected(B);
// (A << C) or (A >>_s C) or (A >>_u C) where C is some constant.
- } else if (match(V,
- m_CombineOr(m_Shl(m_Value(A), m_ConstantInt(C)),
- m_CombineOr(m_LShr(m_Value(A), m_ConstantInt(C)),
- m_AShr(m_Value(A),
- m_ConstantInt(C)))))) {
+ } else if (match(V, m_Shift(m_Value(A), m_ConstantInt(C)))) {
AddAffected(A);
}
};
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index dbb1b01b94ac2..b52a1d7b24d62 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1021,11 +1021,14 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
// asking about values from different loop iterations. See PR32314.
// TODO: We may be able to change the check so we only do this when
// we definitely looked through a PHINode.
- KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL);
- KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL);
- if (Known1.Zero.intersects(Known2.One) ||
- Known1.One.intersects(Known2.Zero))
- return NoAlias;
+ if (GEP1LastIdx != GEP2LastIdx &&
+ GEP1LastIdx->getType() == GEP2LastIdx->getType()) {
+ KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL);
+ KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL);
+ if (Known1.Zero.intersects(Known2.One) ||
+ Known1.One.intersects(Known2.Zero))
+ return NoAlias;
+ }
} else if (isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL))
return NoAlias;
}
@@ -1345,11 +1348,7 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// Statically, we can see that the base objects are the same, but the
// pointers have dynamic offsets which we can't resolve. And none of our
// little tricks above worked.
- //
- // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
- // practical effect of this is protecting TBAA in the case of dynamic
- // indices into arrays of unions or malloc'd memory.
- return PartialAlias;
+ return MayAlias;
}
static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp
index dde24ef5fdd57..6e4263920e586 100644
--- a/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -80,9 +80,6 @@ public:
const AliasSummary &getAliasSummary() const { return Summary; }
};
-/// Try to go from a Value* to a Function*. Never returns nullptr.
-static Optional<Function *> parentFunctionOfValue(Value *);
-
const StratifiedIndex StratifiedLink::SetSentinel =
std::numeric_limits<StratifiedIndex>::max();
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 8f808f3e78719..926b28d6094a5 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -107,6 +107,8 @@ void DemandedBits::determineLiveOperandBits(
AB = AOut.byteSwap();
break;
case Intrinsic::bitreverse:
+ // The alive bits of the input are the reversed alive bits of
+ // the output.
AB = AOut.reverseBits();
break;
case Intrinsic::ctlz:
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 6ff5938a3175a..77ad6f1e166fd 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -1022,12 +1022,15 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// inlining those. It will prevent inlining in cases where the optimization
// does not (yet) fire.
+ // Maximum valid cost increased in this function.
+ int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
+
// Exit early for a large switch, assuming one case needs at least one
// instruction.
// FIXME: This is not true for a bit test, but ignore such case for now to
// save compile-time.
int64_t CostLowerBound =
- std::min((int64_t)INT_MAX,
+ std::min((int64_t)CostUpperBound,
(int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
if (CostLowerBound > Threshold) {
@@ -1044,7 +1047,8 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
if (JumpTableSize) {
int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
4 * InlineConstants::InstrCost;
- Cost = std::min((int64_t)INT_MAX, JTCost + Cost);
+
+ Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
return false;
}
@@ -1068,10 +1072,12 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
return false;
}
- int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1;
- uint64_t SwitchCost =
+
+ int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
+ int64_t SwitchCost =
ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
- Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost);
+
+ Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
return false;
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index a975be79619b7..d9e32a3c417e0 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -2688,16 +2688,14 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
}
// icmp pred (and X, Y), X
- if (LBO && match(LBO, m_CombineOr(m_And(m_Value(), m_Specific(RHS)),
- m_And(m_Specific(RHS), m_Value())))) {
+ if (LBO && match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) {
if (Pred == ICmpInst::ICMP_UGT)
return getFalse(ITy);
if (Pred == ICmpInst::ICMP_ULE)
return getTrue(ITy);
}
// icmp pred X, (and X, Y)
- if (RBO && match(RBO, m_CombineOr(m_And(m_Value(), m_Specific(LHS)),
- m_And(m_Specific(LHS), m_Value())))) {
+ if (RBO && match(RBO, m_c_And(m_Value(), m_Specific(LHS)))) {
if (Pred == ICmpInst::ICMP_UGE)
return getTrue(ITy);
if (Pred == ICmpInst::ICMP_ULT)
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 3ed61a79478ad..102081e721ac6 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -1324,12 +1324,12 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
return getValueFromICmpCondition(Val, ICI, isTrueDest);
// Handle conditions in the form of (cond1 && cond2), we know that on the
- // true dest path both of the conditions hold.
- if (!isTrueDest)
- return LVILatticeVal::getOverdefined();
-
+ // true dest path both of the conditions hold. Similarly for conditions of
+ // the form (cond1 || cond2), we know that on the false dest path neither
+ // condition holds.
BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond);
- if (!BO || BO->getOpcode() != BinaryOperator::And)
+ if (!BO || (isTrueDest && BO->getOpcode() != BinaryOperator::And) ||
+ (!isTrueDest && BO->getOpcode() != BinaryOperator::Or))
return LVILatticeVal::getOverdefined();
auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited);
@@ -1660,6 +1660,26 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
return nullptr;
}
+ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V,
+ BasicBlock *FromBB,
+ BasicBlock *ToBB,
+ Instruction *CxtI) {
+ unsigned Width = V->getType()->getIntegerBitWidth();
+ const DataLayout &DL = FromBB->getModule()->getDataLayout();
+ LVILatticeVal Result =
+ getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+
+ if (Result.isUndefined())
+ return ConstantRange(Width, /*isFullSet=*/false);
+ if (Result.isConstantRange())
+ return Result.getConstantRange();
+ // We represent ConstantInt constants as constant ranges but other kinds
+ // of integer constants, i.e. ConstantExpr will be tagged as constants
+ assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
+ "ConstantInt value must be represented as constantrange");
+ return ConstantRange(Width, /*isFullSet=*/true);
+}
+
static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
const LVILatticeVal &Val,
const DataLayout &DL,
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 96799a459bfc4..591b0fc481d24 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -117,6 +117,16 @@ static bool isDereferenceableAndAlignedPointer(
}
bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+ const APInt &Size,
+ const DataLayout &DL,
+ const Instruction *CtxI,
+ const DominatorTree *DT) {
+ SmallPtrSet<const Value *, 32> Visited;
+ return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT,
+ Visited);
+}
+
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3fdedbb0ab3c2..263cf42ebe271 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -310,11 +310,11 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
}
static bool isVolatile(Instruction *Inst) {
- if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ if (auto *LI = dyn_cast<LoadInst>(Inst))
return LI->isVolatile();
- else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ if (auto *SI = dyn_cast<StoreInst>(Inst))
return SI->isVolatile();
- else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
return AI->isVolatile();
return false;
}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index aebc80a0a8851..73a95ec405c7b 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -126,11 +126,11 @@ static cl::opt<bool>
static cl::opt<unsigned> MulOpsInlineThreshold(
"scev-mulops-inline-threshold", cl::Hidden,
cl::desc("Threshold for inlining multiplication operands into a SCEV"),
- cl::init(1000));
+ cl::init(32));
static cl::opt<unsigned> AddOpsInlineThreshold(
"scev-addops-inline-threshold", cl::Hidden,
- cl::desc("Threshold for inlining multiplication operands into a SCEV"),
+ cl::desc("Threshold for inlining addition operands into a SCEV"),
cl::init(500));
static cl::opt<unsigned> MaxSCEVCompareDepth(
@@ -1259,12 +1259,12 @@ static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
if (SE->isKnownPositive(Step)) {
*Pred = ICmpInst::ICMP_SLT;
return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
- SE->getSignedRange(Step).getSignedMax());
+ SE->getSignedRangeMax(Step));
}
if (SE->isKnownNegative(Step)) {
*Pred = ICmpInst::ICMP_SGT;
return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
- SE->getSignedRange(Step).getSignedMin());
+ SE->getSignedRangeMin(Step));
}
return nullptr;
}
@@ -1279,7 +1279,7 @@ static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
*Pred = ICmpInst::ICMP_ULT;
return SE->getConstant(APInt::getMinValue(BitWidth) -
- SE->getUnsignedRange(Step).getUnsignedMax());
+ SE->getUnsignedRangeMax(Step));
}
namespace {
@@ -1670,7 +1670,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// is safe.
if (isKnownPositive(Step)) {
const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
- getUnsignedRange(Step).getUnsignedMax());
+ getUnsignedRangeMax(Step));
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
(isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
@@ -1686,7 +1686,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
}
} else if (isKnownNegative(Step)) {
const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
- getSignedRange(Step).getSignedMin());
+ getSignedRangeMin(Step));
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
(isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
@@ -3745,7 +3745,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
// makes it so that we cannot make much use of NUW.
auto AddFlags = SCEV::FlagAnyWrap;
const bool RHSIsNotMinSigned =
- !getSignedRange(RHS).getSignedMin().isMinSignedValue();
+ !getSignedRangeMin(RHS).isMinSignedValue();
if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
// Let M be the minimum representable signed value. Then (-1)*RHS
// signed-wraps if and only if RHS is M. That can happen even for
@@ -4758,9 +4758,9 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
/// Determine the range for a particular SCEV. If SignHint is
/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
/// with a "cleaner" unsigned (resp. signed) representation.
-ConstantRange
-ScalarEvolution::getRange(const SCEV *S,
- ScalarEvolution::RangeSignHint SignHint) {
+const ConstantRange &
+ScalarEvolution::getRangeRef(const SCEV *S,
+ ScalarEvolution::RangeSignHint SignHint) {
DenseMap<const SCEV *, ConstantRange> &Cache =
SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
: SignedRanges;
@@ -4791,54 +4791,54 @@ ScalarEvolution::getRange(const SCEV *S,
}
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- ConstantRange X = getRange(Add->getOperand(0), SignHint);
+ ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
- X = X.add(getRange(Add->getOperand(i), SignHint));
+ X = X.add(getRangeRef(Add->getOperand(i), SignHint));
return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
- ConstantRange X = getRange(Mul->getOperand(0), SignHint);
+ ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint);
for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
- X = X.multiply(getRange(Mul->getOperand(i), SignHint));
+ X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint));
return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
- ConstantRange X = getRange(SMax->getOperand(0), SignHint);
+ ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint);
for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
- X = X.smax(getRange(SMax->getOperand(i), SignHint));
+ X = X.smax(getRangeRef(SMax->getOperand(i), SignHint));
return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
- ConstantRange X = getRange(UMax->getOperand(0), SignHint);
+ ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint);
for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
- X = X.umax(getRange(UMax->getOperand(i), SignHint));
+ X = X.umax(getRangeRef(UMax->getOperand(i), SignHint));
return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
- ConstantRange X = getRange(UDiv->getLHS(), SignHint);
- ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
+ ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
+ ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
return setRange(UDiv, SignHint,
ConservativeResult.intersectWith(X.udiv(Y)));
}
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
- ConstantRange X = getRange(ZExt->getOperand(), SignHint);
+ ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint);
return setRange(ZExt, SignHint,
ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
}
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
- ConstantRange X = getRange(SExt->getOperand(), SignHint);
+ ConstantRange X = getRangeRef(SExt->getOperand(), SignHint);
return setRange(SExt, SignHint,
ConservativeResult.intersectWith(X.signExtend(BitWidth)));
}
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
- ConstantRange X = getRange(Trunc->getOperand(), SignHint);
+ ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint);
return setRange(Trunc, SignHint,
ConservativeResult.intersectWith(X.truncate(BitWidth)));
}
@@ -5005,8 +5005,7 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
"Precondition!");
MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
- ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
- APInt MaxBECountValue = MaxBECountRange.getUnsignedMax();
+ APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount);
// First, consider step signed.
ConstantRange StartSRange = getSignedRange(Start);
@@ -5023,7 +5022,7 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
// Next, consider step unsigned.
ConstantRange UR = getRangeForAffineARHelper(
- getUnsignedRange(Step).getUnsignedMax(), getUnsignedRange(Start),
+ getUnsignedRangeMax(Step), getUnsignedRange(Start),
MaxBECountValue, BitWidth, /* Signed = */ false);
// Finally, intersect signed and unsigned ranges.
@@ -6373,7 +6372,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
// to not.
if (isa<SCEVCouldNotCompute>(MaxBECount) &&
!isa<SCEVCouldNotCompute>(BECount))
- MaxBECount = getConstant(getUnsignedRange(BECount).getUnsignedMax());
+ MaxBECount = getConstant(getUnsignedRangeMax(BECount));
return ExitLimit(BECount, MaxBECount, false,
{&EL0.Predicates, &EL1.Predicates});
@@ -7647,7 +7646,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// 1*N = -Start; -1*N = Start (mod 2^BW), so:
// N = Distance (as unsigned)
if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
- APInt MaxBECount = getUnsignedRange(Distance).getUnsignedMax();
+ APInt MaxBECount = getUnsignedRangeMax(Distance);
// When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
// we end up with a loop whose backedge-taken count is n - 1. Detect this
@@ -7680,7 +7679,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
const SCEV *Max =
Exact == getCouldNotCompute()
? Exact
- : getConstant(getUnsignedRange(Exact).getUnsignedMax());
+ : getConstant(getUnsignedRangeMax(Exact));
return ExitLimit(Exact, Max, false, Predicates);
}
@@ -7689,7 +7688,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
getNegativeSCEV(Start), *this);
const SCEV *M = E == getCouldNotCompute()
? E
- : getConstant(getUnsignedRange(E).getUnsignedMax());
+ : getConstant(getUnsignedRangeMax(E));
return ExitLimit(E, M, false, Predicates);
}
@@ -7886,12 +7885,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// adding or subtracting 1 from one of the operands.
switch (Pred) {
case ICmpInst::ICMP_SLE:
- if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
+ if (!getSignedRangeMax(RHS).isMaxSignedValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SLT;
Changed = true;
- } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
+ } else if (!getSignedRangeMin(LHS).isMinSignedValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SLT;
@@ -7899,12 +7898,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
}
break;
case ICmpInst::ICMP_SGE:
- if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
+ if (!getSignedRangeMin(RHS).isMinSignedValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SGT;
Changed = true;
- } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
+ } else if (!getSignedRangeMax(LHS).isMaxSignedValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SGT;
@@ -7912,23 +7911,23 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
}
break;
case ICmpInst::ICMP_ULE:
- if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
+ if (!getUnsignedRangeMax(RHS).isMaxValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
SCEV::FlagNUW);
Pred = ICmpInst::ICMP_ULT;
Changed = true;
- } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
+ } else if (!getUnsignedRangeMin(LHS).isMinValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
Pred = ICmpInst::ICMP_ULT;
Changed = true;
}
break;
case ICmpInst::ICMP_UGE:
- if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
+ if (!getUnsignedRangeMin(RHS).isMinValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
Pred = ICmpInst::ICMP_UGT;
Changed = true;
- } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
+ } else if (!getUnsignedRangeMax(LHS).isMaxValue()) {
LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
SCEV::FlagNUW);
Pred = ICmpInst::ICMP_UGT;
@@ -7962,19 +7961,19 @@ trivially_false:
}
bool ScalarEvolution::isKnownNegative(const SCEV *S) {
- return getSignedRange(S).getSignedMax().isNegative();
+ return getSignedRangeMax(S).isNegative();
}
bool ScalarEvolution::isKnownPositive(const SCEV *S) {
- return getSignedRange(S).getSignedMin().isStrictlyPositive();
+ return getSignedRangeMin(S).isStrictlyPositive();
}
bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
- return !getSignedRange(S).getSignedMin().isNegative();
+ return !getSignedRangeMin(S).isNegative();
}
bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
- return !getSignedRange(S).getSignedMax().isStrictlyPositive();
+ return !getSignedRangeMax(S).isStrictlyPositive();
}
bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
@@ -8560,7 +8559,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
// predicate we're interested in folding.
APInt Min = ICmpInst::isSigned(Pred) ?
- getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
+ getSignedRangeMin(V) : getUnsignedRangeMin(V);
if (Min == C->getAPInt()) {
// Given (V >= Min && V != Min) we conclude V >= (Min + 1).
@@ -9115,19 +9114,17 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
const SCEV *One = getOne(Stride->getType());
if (IsSigned) {
- APInt MaxRHS = getSignedRange(RHS).getSignedMax();
+ APInt MaxRHS = getSignedRangeMax(RHS);
APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
- APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
- .getSignedMax();
+ APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));
// SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS);
}
- APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
+ APInt MaxRHS = getUnsignedRangeMax(RHS);
APInt MaxValue = APInt::getMaxValue(BitWidth);
- APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
- .getUnsignedMax();
+ APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));
// UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS);
@@ -9141,19 +9138,17 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
const SCEV *One = getOne(Stride->getType());
if (IsSigned) {
- APInt MinRHS = getSignedRange(RHS).getSignedMin();
+ APInt MinRHS = getSignedRangeMin(RHS);
APInt MinValue = APInt::getSignedMinValue(BitWidth);
- APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
- .getSignedMax();
+ APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));
// SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS);
}
- APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
+ APInt MinRHS = getUnsignedRangeMin(RHS);
APInt MinValue = APInt::getMinValue(BitWidth);
- APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
- .getUnsignedMax();
+ APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));
// UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS);
@@ -9292,8 +9287,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
} else {
// Calculate the maximum backedge count based on the range of values
// permitted by Start, End, and Stride.
- APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
- : getUnsignedRange(Start).getUnsignedMin();
+ APInt MinStart = IsSigned ? getSignedRangeMin(Start)
+ : getUnsignedRangeMin(Start);
unsigned BitWidth = getTypeSizeInBits(LHS->getType());
@@ -9301,8 +9296,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (PositiveStride)
StrideForMaxBECount =
- IsSigned ? getSignedRange(Stride).getSignedMin()
- : getUnsignedRange(Stride).getUnsignedMin();
+ IsSigned ? getSignedRangeMin(Stride)
+ : getUnsignedRangeMin(Stride);
else
// Using a stride of 1 is safe when computing max backedge taken count for
// a loop with unknown stride.
@@ -9316,8 +9311,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
// the case End = RHS. This is safe because in the other case (End - Start)
// is zero, leading to a zero maximum backedge taken count.
APInt MaxEnd =
- IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
- : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+ IsSigned ? APIntOps::smin(getSignedRangeMax(RHS), Limit)
+ : APIntOps::umin(getUnsignedRangeMax(RHS), Limit);
MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
getConstant(StrideForMaxBECount), false);
@@ -9325,7 +9320,7 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (isa<SCEVCouldNotCompute>(MaxBECount) &&
!isa<SCEVCouldNotCompute>(BECount))
- MaxBECount = getConstant(getUnsignedRange(BECount).getUnsignedMax());
+ MaxBECount = getConstant(getUnsignedRangeMax(BECount));
return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
}
@@ -9376,11 +9371,11 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
- APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
- : getUnsignedRange(Start).getUnsignedMax();
+ APInt MaxStart = IsSigned ? getSignedRangeMax(Start)
+ : getUnsignedRangeMax(Start);
- APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
- : getUnsignedRange(Stride).getUnsignedMin();
+ APInt MinStride = IsSigned ? getSignedRangeMin(Stride)
+ : getUnsignedRangeMin(Stride);
unsigned BitWidth = getTypeSizeInBits(LHS->getType());
APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
@@ -9390,8 +9385,8 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
// the case End = RHS. This is safe because in the other case (Start - End)
// is zero, leading to a zero maximum backedge taken count.
APInt MinEnd =
- IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
- : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
+ IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit)
+ : APIntOps::umax(getUnsignedRangeMin(RHS), Limit);
const SCEV *MaxBECount = getCouldNotCompute();
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index f9b9df2bc707d..47bdac00ae1f3 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -748,18 +748,56 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
// Emit instructions to mul all the operands. Hoist as much as possible
// out of loops.
Value *Prod = nullptr;
- for (const auto &I : OpsAndLoops) {
- const SCEV *Op = I.second;
+ auto I = OpsAndLoops.begin();
+
+ // Expand the calculation of X pow N in the following manner:
+ // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
+ // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
+ const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() {
+ auto E = I;
+ // Calculate how many times the same operand from the same loop is included
+ // into this power.
+ uint64_t Exponent = 0;
+ const uint64_t MaxExponent = UINT64_MAX >> 1;
+ // No one sane will ever try to calculate such huge exponents, but if we
+ // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
+ // below when the power of 2 exceeds our Exponent, and we want it to be
+ // 1u << 31 at most to not deal with unsigned overflow.
+ while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
+ ++Exponent;
+ ++E;
+ }
+ assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
+
+ // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
+ // that are needed into the result.
+ Value *P = expandCodeFor(I->second, Ty);
+ Value *Result = nullptr;
+ if (Exponent & 1)
+ Result = P;
+ for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
+ P = InsertBinop(Instruction::Mul, P, P);
+ if (Exponent & BinExp)
+ Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P;
+ }
+
+ I = E;
+ assert(Result && "Nothing was expanded?");
+ return Result;
+ };
+
+ while (I != OpsAndLoops.end()) {
if (!Prod) {
// This is the first operand. Just expand it.
- Prod = expand(Op);
- } else if (Op->isAllOnesValue()) {
+ Prod = ExpandOpBinPowN();
+ } else if (I->second->isAllOnesValue()) {
// Instead of doing a multiply by negative one, just do a negate.
Prod = InsertNoopCastOfTo(Prod, Ty);
Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+ ++I;
} else {
// A simple mul.
- Value *W = expandCodeFor(Op, Ty);
+ Value *W = ExpandOpBinPowN();
Prod = InsertNoopCastOfTo(Prod, Ty);
// Canonicalize a constant to the RHS.
if (isa<Constant>(Prod)) std::swap(Prod, W);
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index e920c4c4e6b2b..cd9972ab56a68 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -58,7 +58,7 @@
//
// The struct type node has a name and a list of pairs, one pair for each member
// of the struct. The first element of each pair is a type node (a struct type
-// node or a sclar type node), specifying the type of the member, the second
+// node or a scalar type node), specifying the type of the member, the second
// element of each pair is the offset of the member.
//
// Given an example
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index b065f427b06cb..fd6e3a643bf03 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -686,8 +686,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
Known.One |= RHSKnown.Zero;
// assume(v >> c = a)
} else if (match(Arg,
- m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
- m_AShr(m_V, m_ConstantInt(C))),
+ m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
@@ -698,9 +697,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
Known.Zero |= RHSKnown.Zero << C->getZExtValue();
Known.One |= RHSKnown.One << C->getZExtValue();
// assume(~(v >> c) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr(
- m_LShr(m_V, m_ConstantInt(C)),
- m_AShr(m_V, m_ConstantInt(C)))),
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp
index ca4d93f99d92d..f24f22c88a8aa 100644
--- a/lib/BinaryFormat/Magic.cpp
+++ b/lib/BinaryFormat/Magic.cpp
@@ -51,7 +51,8 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::coff_import_library;
}
// Windows resource file
- if (startswith(Magic, "\0\0\0\0\x20\0\0\0\xFF"))
+ if (Magic.size() >= sizeof(COFF::WinResMagic) &&
+ memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
return file_magic::windows_resource;
// 0x0000 = COFF unknown machine type
if (Magic[1] == 0)
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index ad348d723bae0..c48fcaa7b0d1d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2801,26 +2801,24 @@ void AsmPrinter::emitXRayTable() {
}
// Before we switch over, we force a reference to a label inside the
- // xray_instr_map and xray_fn_idx sections. Since this function is always
- // called just before the function's end, we assume that this is happening
- // after the last return instruction. We also use the synthetic label in the
- // xray_inster_map as a delimeter for the range of sleds for this function in
- // the index.
+ // xray_fn_idx sections. This makes sure that the xray_fn_idx section is kept
+ // live by the linker if the function is not garbage-collected. Since this
+ // function is always called just before the function's end, we assume that
+ // this is happening after the last return instruction.
auto WordSizeBytes = MAI->getCodePointerSize();
- MCSymbol *SledsStart = OutContext.createTempSymbol("xray_synthetic_", true);
MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true);
OutStreamer->EmitCodeAlignment(16);
- OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false);
OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false);
// Now we switch to the instrumentation map section. Because this is done
// per-function, we are able to create an index entry that will represent the
// range of sleds associated with a function.
+ MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true);
OutStreamer->SwitchSection(InstMap);
OutStreamer->EmitLabel(SledsStart);
for (const auto &Sled : Sleds)
Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
- MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_synthetic_end", true);
+ MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true);
OutStreamer->EmitLabel(SledsEnd);
// We then emit a single entry in the index per function. We use the symbols
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 30bfd7c94e68b..886e6e264b3ec 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -105,7 +105,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
}
LLVM_DUMP_METHOD
-void DIEAbbrev::print(raw_ostream &O) {
+void DIEAbbrev::print(raw_ostream &O) const {
O << "Abbreviation @"
<< format("0x%lx", (long)(intptr_t)this)
<< " "
@@ -128,7 +128,7 @@ void DIEAbbrev::print(raw_ostream &O) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void DIEAbbrev::dump() {
+LLVM_DUMP_METHOD void DIEAbbrev::dump() const {
print(dbgs());
}
#endif
@@ -268,7 +268,7 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void DIE::dump() {
+LLVM_DUMP_METHOD void DIE::dump() const {
print(dbgs());
}
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 75eb355bfb543..f1b4d9f20ca96 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -972,16 +972,62 @@ DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU,
return ConcreteVariables.back().get();
}
-// Determine whether this DBG_VALUE is valid at the beginning of the function.
-static bool validAtEntry(const MachineInstr *MInsn) {
- auto MBB = MInsn->getParent();
- // Is it in the entry basic block?
- if (!MBB->pred_empty())
+/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
+/// enclosing lexical scope. The check ensures there are no other instructions
+/// in the same lexical scope preceding the DBG_VALUE and that its range is
+/// either open or otherwise rolls off the end of the scope.
+static bool validThroughout(LexicalScopes &LScopes,
+ const MachineInstr *DbgValue,
+ const MachineInstr *RangeEnd) {
+ assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
+ auto MBB = DbgValue->getParent();
+ auto DL = DbgValue->getDebugLoc();
+ auto *LScope = LScopes.findLexicalScope(DL);
+ // Scope doesn't exist; this is a dead DBG_VALUE.
+ if (!LScope)
return false;
- for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I)
- if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup)))
+ auto &LSRange = LScope->getRanges();
+ if (LSRange.size() == 0)
+ return false;
+
+ // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
+ const MachineInstr *LScopeBegin = LSRange.front().first;
+ // Early exit if the lexical scope begins outside of the current block.
+ if (LScopeBegin->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
+ for (++Pred; Pred != MBB->rend(); ++Pred) {
+ if (Pred->getFlag(MachineInstr::FrameSetup))
+ break;
+ auto PredDL = Pred->getDebugLoc();
+ if (!PredDL || Pred->isMetaInstruction())
+ continue;
+ // Check whether the instruction preceding the DBG_VALUE is in the same
+ // (sub)scope as the DBG_VALUE.
+ if (DL->getScope() == PredDL->getScope())
return false;
- return true;
+ auto *PredScope = LScopes.findLexicalScope(PredDL);
+ if (!PredScope || LScope->dominates(PredScope))
+ return false;
+ }
+
+ // If the range of the DBG_VALUE is open-ended, report success.
+ if (!RangeEnd)
+ return true;
+
+ // Fail if there are instructions belonging to our scope in another block.
+ const MachineInstr *LScopeEnd = LSRange.back().second;
+ if (LScopeEnd->getParent() != MBB)
+ return false;
+
+ // Single, constant DBG_VALUEs in the prologue are promoted to be live
+ // throughout the function. This is a hack, presumably for DWARF v2 and not
+ // necessarily correct. It would be much better to use a dbg.declare instead
+ // if we know the constant is live throughout the scope.
+ if (DbgValue->getOperand(0).isImm() && MBB->pred_empty())
+ return true;
+
+ return false;
}
// Find variables for each lexical scope.
@@ -1016,11 +1062,9 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
- // Check if there is a single DBG_VALUE, valid throughout the function.
- // A single constant is also considered valid for the entire function.
+ // Check if there is a single DBG_VALUE, valid throughout the var's scope.
if (Ranges.size() == 1 &&
- (MInsn->getOperand(0).isImm() ||
- (validAtEntry(MInsn) && Ranges.front().second == nullptr))) {
+ validThroughout(LScopes, MInsn, Ranges.front().second)) {
RegVar->initializeDbgValue(MInsn);
continue;
}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 55a27e2fb79e5..7f3c6da912687 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -92,6 +92,7 @@ add_llvm_library(LLVMCodeGen
PatchableFunction.cpp
MIRPrinter.cpp
MIRPrintingPass.cpp
+ MacroFusion.cpp
OptimizePHIs.cpp
ParallelCG.cpp
PeepholeOptimizer.cpp
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 37e176099ea7a..cb31c21293f44 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1663,17 +1663,18 @@ class MemCmpExpansion {
bool IsUsedForZeroCmp;
const DataLayout &DL;
- int calculateNumBlocks(unsigned Size);
+ unsigned calculateNumBlocks(unsigned Size);
void createLoadCmpBlocks();
void createResultBlock();
void setupResultBlockPHINodes();
void setupEndBlockPHINodes();
- void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex);
+ void emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
+ unsigned GEPIndex);
Value *getCompareLoadPairs(unsigned Index, unsigned Size,
unsigned &NumBytesProcessed, IRBuilder<> &Builder);
void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
unsigned &NumBytesProcessed);
- void emitLoadCompareByteBlock(unsigned Index, int GEPIndex);
+ void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex);
void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase(unsigned Size);
Value *getMemCmpEqZeroOneBlock(unsigned Size);
@@ -1751,7 +1752,8 @@ void MemCmpExpansion::createResultBlock() {
// It loads 1 byte from each source of the memcmp parameters with the given
// GEPIndex. It then subtracts the two loaded values and adds this result to the
// final phi node for selecting the memcmp result.
-void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) {
+void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index,
+ unsigned GEPIndex) {
IRBuilder<> Builder(CI->getContext());
Value *Source1 = CI->getArgOperand(0);
@@ -1833,6 +1835,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ assert(LoadSize <= MaxLoadSize && "Unexpected load type");
Value *Source1 = CI->getArgOperand(0);
Value *Source2 = CI->getArgOperand(1);
@@ -1851,18 +1854,28 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
ConstantInt::get(LoadSizeType, GEPIndex));
}
- // Load LoadSizeType from the base address.
- Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
- Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+ // Get a constant or load a value for each source address.
+ Value *LoadSrc1 = nullptr;
+ if (auto *Source1C = dyn_cast<Constant>(Source1))
+ LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
+ if (!LoadSrc1)
+ LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+
+ Value *LoadSrc2 = nullptr;
+ if (auto *Source2C = dyn_cast<Constant>(Source2))
+ LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
+ if (!LoadSrc2)
+ LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
if (NumLoads != 1) {
if (LoadSizeType != MaxLoadType) {
- LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
- LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
}
// If we have multiple loads per block, we need to generate a composite
// comparison using xor+or.
Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
- Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType);
+ Diff = Builder.CreateZExt(Diff, MaxLoadType);
XorList.push_back(Diff);
} else {
// If there's only one load per block, we just compare the loaded values.
@@ -1926,8 +1939,8 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
// a special case through emitLoadCompareByteBlock. The special handling can
// simply subtract the loaded values and add it to the result phi node.
-void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
- int GEPIndex) {
+void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
+ unsigned GEPIndex) {
if (LoadSize == 1) {
MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex);
return;
@@ -1937,6 +1950,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ assert(LoadSize <= MaxLoadSize && "Unexpected load type");
Value *Source1 = CI->getArgOperand(0);
Value *Source2 = CI->getArgOperand(1);
@@ -1970,8 +1984,8 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
}
if (LoadSizeType != MaxLoadType) {
- LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
- LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
}
// Add the loaded values to the phi nodes for calculating memcmp result only
@@ -2034,8 +2048,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
PhiRes->addIncoming(Res, ResBlock.BB);
}
-int MemCmpExpansion::calculateNumBlocks(unsigned Size) {
- int NumBlocks = 0;
+unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) {
+ unsigned NumBlocks = 0;
bool HaveOneByteLoad = false;
unsigned RemainingSize = Size;
unsigned LoadSize = MaxLoadSize;
@@ -2104,13 +2118,13 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
// memcmp sources. It starts with loading using the maximum load size set by
// the target. It processes any remaining bytes using a load size which is the
// next smallest power of 2.
- int LoadSize = MaxLoadSize;
- int NumBytesToBeProcessed = Size;
+ unsigned LoadSize = MaxLoadSize;
+ unsigned NumBytesToBeProcessed = Size;
unsigned Index = 0;
while (NumBytesToBeProcessed) {
// Calculate how many blocks we can create with the current load size.
- int NumBlocks = NumBytesToBeProcessed / LoadSize;
- int GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;
+ unsigned NumBlocks = NumBytesToBeProcessed / LoadSize;
+ unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;
NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize;
// For each NumBlocks, populate the instruction sequence for loading and
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index dccd8e0706ca6..239bad2f53557 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -582,7 +582,7 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MIB.addUse(Zero);
}
- MIRBuilder.buildSequence(getOrCreateVReg(CI), Res, 0, Overflow, Width);
+ MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width});
return true;
}
@@ -686,6 +686,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(0)))
.addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
+ case Intrinsic::fma:
+ MIRBuilder.buildInstr(TargetOpcode::G_FMA)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(1)))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(2)));
+ return true;
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 4c0b06dffd216..5466efd7e90f4 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -25,6 +25,18 @@ using namespace llvm;
InstructionSelector::InstructionSelector() {}
+bool InstructionSelector::constrainOperandRegToRegClass(
+ MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC,
+ const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const {
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ return llvm::constrainRegToClass(MRI, TII, RBI, I,
+ I.getOperand(OpIdx).getReg(), RC);
+}
+
bool InstructionSelector::constrainSelectedInstRegOperands(
MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) const {
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index 1b50489deeba9..b699156c568b4 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -50,72 +50,9 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
void Legalizer::init(MachineFunction &MF) {
}
-bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII) {
- bool Changed = false;
- if (MI.getOpcode() != TargetOpcode::G_EXTRACT)
- return Changed;
-
- unsigned NumDefs = (MI.getNumOperands() - 1) / 2;
- unsigned SrcReg = MI.getOperand(NumDefs).getReg();
- MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg);
- if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE)
- return Changed;
-
- unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2;
- bool AllDefsReplaced = true;
-
- // Try to match each register extracted with a corresponding insertion formed
- // by the G_SEQUENCE.
- for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) {
- MachineOperand &ExtractMO = MI.getOperand(Idx);
- assert(ExtractMO.isReg() && ExtractMO.isDef() &&
- "unexpected extract operand");
-
- unsigned ExtractReg = ExtractMO.getReg();
- unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm();
-
- while (SeqIdx < NumSeqSrcs &&
- SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos)
- ++SeqIdx;
-
- if (SeqIdx == NumSeqSrcs) {
- AllDefsReplaced = false;
- continue;
- }
-
- unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg();
- if (SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos ||
- MRI.getType(OrigReg) != MRI.getType(ExtractReg)) {
- AllDefsReplaced = false;
- continue;
- }
-
- assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) &&
- "unexpected physical register in G_SEQUENCE");
-
- // Finally we can replace the uses.
- MRI.replaceRegWith(ExtractReg, OrigReg);
- }
-
- if (AllDefsReplaced) {
- // If SeqI was the next instruction in the BB and we removed it, we'd break
- // the outer iteration.
- assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI &&
- "G_SEQUENCE does not dominate G_EXTRACT");
-
- MI.eraseFromParent();
-
- if (MRI.use_empty(SrcReg))
- SeqI.eraseFromParent();
- Changed = true;
- }
-
- return Changed;
-}
-
bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII) {
+ const TargetInstrInfo &TII,
+ MachineIRBuilder &MIRBuilder) {
if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
return false;
@@ -125,18 +62,62 @@ bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI,
if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES)
return false;
- if (MergeI.getNumOperands() - 1 != NumDefs)
- return false;
+ const unsigned NumMergeRegs = MergeI.getNumOperands() - 1;
- // FIXME: is a COPY appropriate if the types mismatch? We know both registers
- // are allocatable by now.
- if (MRI.getType(MI.getOperand(0).getReg()) !=
- MRI.getType(MergeI.getOperand(1).getReg()))
- return false;
+ if (NumMergeRegs < NumDefs) {
+ if (NumDefs % NumMergeRegs != 0)
+ return false;
+
+ MIRBuilder.setInstr(MI);
+ // Transform to UNMERGEs, for example
+ // %1 = G_MERGE_VALUES %4, %5
+ // %9, %10, %11, %12 = G_UNMERGE_VALUES %1
+ // to
+ // %9, %10 = G_UNMERGE_VALUES %4
+ // %11, %12 = G_UNMERGE_VALUES %5
+
+ const unsigned NewNumDefs = NumDefs / NumMergeRegs;
+ for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
+ SmallVector<unsigned, 2> DstRegs;
+ for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
+ ++j, ++DefIdx)
+ DstRegs.push_back(MI.getOperand(DefIdx).getReg());
+
+ MIRBuilder.buildUnmerge(DstRegs, MergeI.getOperand(Idx + 1).getReg());
+ }
+
+ } else if (NumMergeRegs > NumDefs) {
+ if (NumMergeRegs % NumDefs != 0)
+ return false;
+
+ MIRBuilder.setInstr(MI);
+ // Transform to MERGEs
+ // %6 = G_MERGE_VALUES %17, %18, %19, %20
+ // %7, %8 = G_UNMERGE_VALUES %6
+ // to
+ // %7 = G_MERGE_VALUES %17, %18
+ // %8 = G_MERGE_VALUES %19, %20
+
+ const unsigned NumRegs = NumMergeRegs / NumDefs;
+ for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
+ SmallVector<unsigned, 2> Regs;
+ for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; ++j, ++Idx)
+ Regs.push_back(MergeI.getOperand(Idx).getReg());
+
+ MIRBuilder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs);
+ }
- for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
- MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
- MergeI.getOperand(Idx + 1).getReg());
+ } else {
+ // FIXME: is a COPY appropriate if the types mismatch? We know both
+ // registers are allocatable by now.
+ if (MRI.getType(MI.getOperand(0).getReg()) !=
+ MRI.getType(MergeI.getOperand(1).getReg()))
+ return false;
+
+ for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
+ MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
+ MergeI.getOperand(Idx + 1).getReg());
+ }
MI.eraseFromParent();
if (MRI.use_empty(MergeI.getOperand(0).getReg()))
@@ -226,13 +207,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// Get the next Instruction before we try to legalize, because there's a
// good chance MI will be deleted.
NextMI = std::next(MI);
-
- // combineExtracts erases MI.
- if (combineExtracts(*MI, MRI, TII)) {
- Changed = true;
- continue;
- }
- Changed |= combineMerges(*MI, MRI, TII);
+ Changed |= combineMerges(*MI, MRI, TII, Helper.MIRBuilder);
}
}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 4d45910422967..595802f2228b9 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -75,8 +75,7 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const {
// FIXME: the long-term plan calls for expansion in terms of load/store (if
// they're not legal).
- if (Aspect.Opcode == TargetOpcode::G_SEQUENCE ||
- Aspect.Opcode == TargetOpcode::G_EXTRACT ||
+ if (Aspect.Opcode == TargetOpcode::G_EXTRACT ||
Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES)
return std::make_pair(Legal, Aspect.Type);
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 79d312fb52ca4..3c70013ea296b 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -425,10 +425,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
.addImm(Index);
}
-MachineInstrBuilder
-MachineIRBuilder::buildSequence(unsigned Res,
- ArrayRef<unsigned> Ops,
- ArrayRef<uint64_t> Indices) {
+void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+ ArrayRef<uint64_t> Indices) {
#ifndef NDEBUG
assert(Ops.size() == Indices.size() && "incompatible args");
assert(!Ops.empty() && "invalid trivial sequence");
@@ -440,13 +438,31 @@ MachineIRBuilder::buildSequence(unsigned Res,
assert(MRI->getType(Op).isValid() && "invalid operand type");
#endif
- MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE);
- MIB.addDef(Res);
+ LLT ResTy = MRI->getType(Res);
+ LLT OpTy = MRI->getType(Ops[0]);
+ unsigned OpSize = OpTy.getSizeInBits();
+ bool MaybeMerge = true;
for (unsigned i = 0; i < Ops.size(); ++i) {
- MIB.addUse(Ops[i]);
- MIB.addImm(Indices[i]);
+ if (MRI->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) {
+ MaybeMerge = false;
+ break;
+ }
+ }
+
+ if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) {
+ buildMerge(Res, Ops);
+ return;
+ }
+
+ unsigned ResIn = MRI->createGenericVirtualRegister(ResTy);
+ buildUndef(ResIn);
+
+ for (unsigned i = 0; i < Ops.size(); ++i) {
+ unsigned ResOut =
+ i + 1 == Ops.size() ? Res : MRI->createGenericVirtualRegister(ResTy);
+ buildInsert(ResOut, ResIn, Ops[i], Indices[i]);
+ ResIn = ResOut;
}
- return MIB;
}
MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) {
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 254bdf10d804f..5ecaf5c563f82 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -26,6 +26,23 @@
using namespace llvm;
+unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII,
+ const RegisterBankInfo &RBI,
+ MachineInstr &InsertPt, unsigned Reg,
+ const TargetRegisterClass &RegClass) {
+ if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) {
+ unsigned NewReg = MRI.createVirtualRegister(&RegClass);
+ BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), NewReg)
+ .addReg(Reg);
+ return NewReg;
+ }
+
+ return Reg;
+}
+
+
unsigned llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
@@ -36,16 +53,7 @@ unsigned llvm::constrainOperandRegClass(
"PhysReg not implemented");
const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
-
- if (!RBI.constrainGenericRegister(Reg, *RegClass, MRI)) {
- unsigned NewReg = MRI.createVirtualRegister(RegClass);
- BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
- TII.get(TargetOpcode::COPY), NewReg)
- .addReg(Reg);
- return NewReg;
- }
-
- return Reg;
+ return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass);
}
bool llvm::isTriviallyDead(const MachineInstr &MI,
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index c98c9b68ac0e4..ff8405366173e 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1474,8 +1474,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
DontKill.addLiveIns(NextMBB);
}
+ // Remove the branches from the entry so we can add the contents of the true
+ // block to it.
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
if (CvtMBB.pred_size() > 1) {
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
@@ -1484,11 +1487,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// explicitly remove CvtBBI as a successor.
BBI.BB->removeSuccessor(&CvtMBB, true);
} else {
+ // Predicate the instructions in the true block.
RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI);
PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
// Merge converted block into entry block.
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
MergeBlocks(BBI, *CvtBBI);
}
@@ -1588,8 +1591,11 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB);
}
+ // Remove the branches from the entry so we can add the contents of the true
+ // block to it.
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
if (CvtMBB.pred_size() > 1) {
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
@@ -1603,7 +1609,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
// Now merge the entry of the triangle with the true block.
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
MergeBlocks(BBI, *CvtBBI, false);
}
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index b831ddfa601a6..e308f49ec4e85 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -359,30 +359,15 @@ ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
Offset < PageSize))
return SR_Unsuitable;
- // Finally, we need to make sure that the access instruction actually is
- // accessing from PointerReg, and there isn't some re-definition of PointerReg
- // between the compare and the memory access.
- // If PointerReg has been redefined before then there is no sense to continue
- // lookup due to this condition will fail for any further instruction.
- SuitabilityResult Suitable = SR_Suitable;
- for (auto *PrevMI : PrevInsts)
- for (auto &PrevMO : PrevMI->operands()) {
- if (PrevMO.isReg() && PrevMO.getReg() && PrevMO.isDef() &&
- TRI->regsOverlap(PrevMO.getReg(), PointerReg))
- return SR_Impossible;
-
- // Check whether the current memory access aliases with previous one.
- // If we already found that it aliases then no need to continue.
- // But we continue base pointer check as it can result in SR_Impossible.
- if (Suitable == SR_Suitable) {
- AliasResult AR = areMemoryOpsAliased(MI, PrevMI);
- if (AR == AR_WillAliasEverything)
- return SR_Impossible;
- if (AR == AR_MayAlias)
- Suitable = SR_Unsuitable;
- }
- }
- return Suitable;
+ // Finally, check whether the current memory access aliases with previous one.
+ for (auto *PrevMI : PrevInsts) {
+ AliasResult AR = areMemoryOpsAliased(MI, PrevMI);
+ if (AR == AR_WillAliasEverything)
+ return SR_Impossible;
+ if (AR == AR_MayAlias)
+ return SR_Unsuitable;
+ }
+ return SR_Suitable;
}
bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
@@ -569,6 +554,12 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
return true;
}
+ // If MI re-defines the PointerReg then we cannot move further.
+ if (any_of(MI.operands(), [&](MachineOperand &MO) {
+ return MO.isReg() && MO.getReg() && MO.isDef() &&
+ TRI->regsOverlap(MO.getReg(), PointerReg);
+ }))
+ return false;
InstsSeenSoFar.push_back(&MI);
}
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index bbd783367c9e8..0c76478af551f 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -1006,7 +1006,7 @@ bool LiveDebugVariables::doInitialization(Module &M) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void LiveDebugVariables::dump() {
+LLVM_DUMP_METHOD void LiveDebugVariables::dump() const {
if (pImpl)
static_cast<LDVImpl*>(pImpl)->print(dbgs());
}
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index afe87a52544d8..1d7e3d4371a24 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -59,7 +59,7 @@ public:
void emitDebugValues(VirtRegMap *VRM);
/// dump - Print data structures to dbgs().
- void dump();
+ void dump() const;
private:
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 590acc01008a6..81597afe6b02b 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -228,6 +228,12 @@ LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
}
#endif
+bool MachineBasicBlock::isLegalToHoistInto() const {
+ if (isReturnBlock() || hasEHPadSuccessor())
+ return false;
+ return true;
+}
+
StringRef MachineBasicBlock::getName() const {
if (const BasicBlock *LBB = getBasicBlock())
return LBB->getName();
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 2a6cb07dbd2da..81c6dace92e04 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -558,6 +559,23 @@ unsigned MachinePointerInfo::getAddrSpace() const {
return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
}
+/// isDereferenceable - Return true if V is always dereferenceable for
+/// Offset + Size byte.
+bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
+ const DataLayout &DL) const {
+ if (!V.is<const Value*>())
+ return false;
+
+ const Value *BasePtr = V.get<const Value*>();
+ if (BasePtr == nullptr)
+ return false;
+
+ return isDereferenceableAndAlignedPointer(BasePtr, 1,
+ APInt(DL.getPointerSize(),
+ Offset + Size),
+ DL);
+}
+
/// getConstantPool - Return a MachinePointerInfo record that refers to the
/// constant pool.
MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 4c81fd91cb829..22d519e5d88fa 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -23,7 +23,6 @@ using namespace llvm;
// Out of line virtual method.
void MachineModuleInfoMachO::anchor() {}
void MachineModuleInfoELF::anchor() {}
-void MachineModuleInfoWasm::anchor() {}
static int SortSymbolPair(const void *LHS, const void *RHS) {
typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 01a2286b8d66a..eaba9a58557c3 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -542,10 +542,10 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void ReadyQueue::dump() {
+LLVM_DUMP_METHOD void ReadyQueue::dump() const {
dbgs() << "Queue " << Name << ": ";
- for (unsigned i = 0, e = Queue.size(); i < e; ++i)
- dbgs() << Queue[i]->NodeNum << " ";
+ for (const SUnit *SU : Queue)
+ dbgs() << SU->NodeNum << " ";
dbgs() << "\n";
}
#endif
@@ -609,10 +609,8 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
/// releaseSuccessors - Call releaseSucc on each of SU's successors.
void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- releaseSucc(SU, &*I);
- }
+ for (SDep &Succ : SU->Succs)
+ releaseSucc(SU, &Succ);
}
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
@@ -648,10 +646,8 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
/// releasePredecessors - Call releasePred on each of SU's predecessors.
void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- releasePred(SU, &*I);
- }
+ for (SDep &Pred : SU->Preds)
+ releasePred(SU, &Pred);
}
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
@@ -724,8 +720,8 @@ void ScheduleDAGMI::schedule() {
DEBUG(
if (EntrySU.getInstr() != nullptr)
EntrySU.dumpAll(this);
- for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this);
+ for (const SUnit &SU : SUnits)
+ SU.dumpAll(this);
if (ExitSU.getInstr() != nullptr)
ExitSU.dumpAll(this);
);
@@ -786,28 +782,25 @@ void ScheduleDAGMI::schedule() {
/// Apply each ScheduleDAGMutation step in order.
void ScheduleDAGMI::postprocessDAG() {
- for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
- Mutations[i]->apply(this);
- }
+ for (auto &m : Mutations)
+ m->apply(this);
}
void ScheduleDAGMI::
findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
SmallVectorImpl<SUnit*> &BotRoots) {
- for (std::vector<SUnit>::iterator
- I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
- SUnit *SU = &(*I);
- assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
+ for (SUnit &SU : SUnits) {
+ assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
// Order predecessors so DFSResult follows the critical path.
- SU->biasCriticalPath();
+ SU.biasCriticalPath();
// A SUnit is ready to top schedule if it has no predecessors.
- if (!I->NumPredsLeft)
- TopRoots.push_back(SU);
+ if (!SU.NumPredsLeft)
+ TopRoots.push_back(&SU);
// A SUnit is ready to bottom schedule if it has no successors.
- if (!I->NumSuccsLeft)
- BotRoots.push_back(SU);
+ if (!SU.NumSuccsLeft)
+ BotRoots.push_back(&SU);
}
ExitSU.biasCriticalPath();
}
@@ -822,10 +815,9 @@ void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
//
// Nodes with unreleased weak edges can still be roots.
// Release top roots in forward order.
- for (SmallVectorImpl<SUnit*>::const_iterator
- I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
- SchedImpl->releaseTopNode(*I);
- }
+ for (SUnit *SU : TopRoots)
+ SchedImpl->releaseTopNode(SU);
+
// Release bottom roots in reverse order so the higher priority nodes appear
// first. This is more natural and slightly more efficient.
for (SmallVectorImpl<SUnit*>::const_reverse_iterator
@@ -1029,9 +1021,9 @@ void ScheduleDAGMILive::initRegPressure() {
}
}
DEBUG(dbgs() << "Excess PSets: ";
- for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
+ for (const PressureChange &RCPS : RegionCriticalPSets)
dbgs() << TRI->getRegPressureSetName(
- RegionCriticalPSets[i].getPSet()) << " ";
+ RCPS.getPSet()) << " ";
dbgs() << "\n");
}
@@ -1040,11 +1032,10 @@ updateScheduledPressure(const SUnit *SU,
const std::vector<unsigned> &NewMaxPressure) {
const PressureDiff &PDiff = getPressureDiff(SU);
unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
- for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
- I != E; ++I) {
- if (!I->isValid())
+ for (const PressureChange &PC : PDiff) {
+ if (!PC.isValid())
break;
- unsigned ID = I->getPSet();
+ unsigned ID = PC.getPSet();
while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
++CritIdx;
if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
@@ -1508,8 +1499,7 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII,
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
- for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
- SUnit *SU = MemOps[Idx];
+ for (SUnit *SU : MemOps) {
unsigned BaseReg;
int64_t Offset;
if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
@@ -1537,12 +1527,11 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
// dependent on SUa can prevent load combining due to register reuse.
// Predecessor edges do not need to be copied from SUb to SUa since nearby
// loads should have effectively the same inputs.
- for (SUnit::const_succ_iterator
- SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
- if (SI->getSUnit() == SUb)
+ for (const SDep &Succ : SUa->Succs) {
+ if (Succ.getSUnit() == SUb)
continue;
- DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
- DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
+ DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
+ DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
}
++ClusterLength;
} else
@@ -1559,17 +1548,15 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
DenseMap<unsigned, unsigned> StoreChainIDs;
// Map each store chain to a set of dependent MemOps.
SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
- for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
- SUnit *SU = &DAG->SUnits[Idx];
- if ((IsLoad && !SU->getInstr()->mayLoad()) ||
- (!IsLoad && !SU->getInstr()->mayStore()))
+ for (SUnit &SU : DAG->SUnits) {
+ if ((IsLoad && !SU.getInstr()->mayLoad()) ||
+ (!IsLoad && !SU.getInstr()->mayStore()))
continue;
unsigned ChainPredID = DAG->SUnits.size();
- for (SUnit::const_pred_iterator
- PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
- if (PI->isCtrl()) {
- ChainPredID = PI->getSUnit()->NodeNum;
+ for (const SDep &Pred : SU.Preds) {
+ if (Pred.isCtrl()) {
+ ChainPredID = Pred.getSUnit()->NodeNum;
break;
}
}
@@ -1580,12 +1567,12 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
if (Result.second)
StoreChainDependents.resize(NumChains + 1);
- StoreChainDependents[Result.first->second].push_back(SU);
+ StoreChainDependents[Result.first->second].push_back(&SU);
}
// Iterate over the store chains.
- for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
- clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
+ for (auto &SCD : StoreChainDependents)
+ clusterNeighboringMemOps(SCD, DAG);
}
//===----------------------------------------------------------------------===//
@@ -1728,16 +1715,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
- for (SUnit::const_succ_iterator
- I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
- I != E; ++I) {
- if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
+ for (const SDep &Succ : LastLocalSU->Succs) {
+ if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
continue;
- if (I->getSUnit() == GlobalSU)
+ if (Succ.getSUnit() == GlobalSU)
continue;
- if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
+ if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
return;
- LocalUses.push_back(I->getSUnit());
+ LocalUses.push_back(Succ.getSUnit());
}
// Open the top of the GlobalLI hole by constraining any earlier global uses
// to precede the start of LocalLI.
@@ -1745,15 +1730,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
MachineInstr *FirstLocalDef =
LIS->getInstructionFromIndex(LocalLI->beginIndex());
SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
- for (SUnit::const_pred_iterator
- I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
- if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
+ for (const SDep &Pred : GlobalSU->Preds) {
+ if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
continue;
- if (I->getSUnit() == FirstLocalSU)
+ if (Pred.getSUnit() == FirstLocalSU)
continue;
- if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
+ if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
return;
- GlobalUses.push_back(I->getSUnit());
+ GlobalUses.push_back(Pred.getSUnit());
}
DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
// Add the weak edges.
@@ -1784,12 +1768,11 @@ void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
RegionEndIdx = DAG->getLIS()->getInstructionIndex(
*priorNonDebug(DAG->end(), DAG->begin()));
- for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
- SUnit *SU = &DAG->SUnits[Idx];
- if (!SU->getInstr()->isCopy())
+ for (SUnit &SU : DAG->SUnits) {
+ if (!SU.getInstr()->isCopy())
continue;
- constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
+ constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
}
}
@@ -1840,10 +1823,9 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
if (!SchedModel->hasInstrSchedModel())
return;
RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
- for (std::vector<SUnit>::iterator
- I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
- const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
- RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
+ for (SUnit &SU : DAG->SUnits) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
+ RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
* SchedModel->getMicroOpFactor();
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
@@ -1957,12 +1939,11 @@ unsigned SchedBoundary::
findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
SUnit *LateSU = nullptr;
unsigned RemLatency = 0;
- for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
- I != E; ++I) {
- unsigned L = getUnscheduledLatency(*I);
+ for (SUnit *SU : ReadySUs) {
+ unsigned L = getUnscheduledLatency(SU);
if (L > RemLatency) {
RemLatency = L;
- LateSU = *I;
+ LateSU = SU;
}
}
if (LateSU) {
@@ -2328,7 +2309,7 @@ SUnit *SchedBoundary::pickOnlyChoice() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// This is useful information to dump after bumpNode.
// Note that the Queue contents are more useful before pickNodeFromQueue.
-LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() {
+LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
unsigned ResFactor;
unsigned ResCount;
if (ZoneCritResIdx) {
@@ -2667,7 +2648,7 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
}
}
-void GenericScheduler::dumpPolicy() {
+void GenericScheduler::dumpPolicy() const {
// Cannot completely remove virtual function even in release mode.
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << "GenericScheduler RegionPolicy: "
@@ -2719,10 +2700,9 @@ void GenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
// Some roots may not feed into ExitSU. Check all of them in case.
- for (std::vector<SUnit*>::const_iterator
- I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
- if ((*I)->getDepth() > Rem.CriticalPath)
- Rem.CriticalPath = (*I)->getDepth();
+ for (const SUnit *SU : Bot.Available) {
+ if (SU->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = SU->getDepth();
}
DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
if (DumpCriticalPathLength) {
@@ -2969,10 +2949,10 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
ReadyQueue &Q = Zone.Available;
- for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ for (SUnit *SU : Q) {
SchedCandidate TryCand(ZonePolicy);
- initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
+ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
tryCandidate(Cand, TryCand, ZoneArg);
@@ -3118,18 +3098,17 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
// Find already scheduled copies with a single physreg dependence and move
// them just above the scheduled instruction.
- for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
- I != E; ++I) {
- if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
+ for (SDep &Dep : Deps) {
+ if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
continue;
- SUnit *DepSU = I->getSUnit();
+ SUnit *DepSU = Dep.getSUnit();
if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
continue;
MachineInstr *Copy = DepSU->getInstr();
if (!Copy->isCopy())
continue;
DEBUG(dbgs() << " Rescheduling physreg copy ";
- I->getSUnit()->dump(DAG));
+ Dep.getSUnit()->dump(DAG));
DAG->moveInstruction(Copy, InsertPos);
}
}
@@ -3204,10 +3183,9 @@ void PostGenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
// Some roots may not feed into ExitSU. Check all of them in case.
- for (SmallVectorImpl<SUnit*>::const_iterator
- I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
- if ((*I)->getDepth() > Rem.CriticalPath)
- Rem.CriticalPath = (*I)->getDepth();
+ for (const SUnit *SU : BotRoots) {
+ if (SU->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = SU->getDepth();
}
DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
if (DumpCriticalPathLength) {
@@ -3260,9 +3238,9 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
ReadyQueue &Q = Top.Available;
- for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ for (SUnit *SU : Q) {
SchedCandidate TryCand(Cand.Policy);
- TryCand.SU = *I;
+ TryCand.SU = SU;
TryCand.AtTop = true;
TryCand.initResourceDelta(DAG, SchedModel);
tryCandidate(Cand, TryCand);
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
new file mode 100644
index 0000000000000..45ea0e4c39ab4
--- /dev/null
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -0,0 +1,150 @@
+//===- MacroFusion.cpp - Macro Fusion ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the implementation of the DAG scheduling mutation
+/// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define DEBUG_TYPE "misched"
+
+STATISTIC(NumFused, "Number of instr pairs fused");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+namespace {
+
+static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
+ SUnit &SecondSU) {
+ // Create a single weak edge between the adjacent instrs. The only effect is
+ // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
+ DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
+
+ // Adjust the latency between the anchor instr and its
+ // predecessors.
+ for (SDep &IDep : SecondSU.Preds)
+ if (IDep.getSUnit() == &FirstSU)
+ IDep.setLatency(0);
+
+ // Adjust the latency between the dependent instr and its
+ // predecessors.
+ for (SDep &IDep : FirstSU.Succs)
+ if (IDep.getSUnit() == &SecondSU)
+ IDep.setLatency(0);
+
+ DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse ";
+ FirstSU.print(dbgs(), &DAG); dbgs() << " - ";
+ SecondSU.print(dbgs(), &DAG); dbgs() << " / ";
+ dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " <<
+ DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; );
+
+ if (&SecondSU != &DAG.ExitSU)
+ // Make instructions dependent on FirstSU also dependent on SecondSU to
+ // prevent them from being scheduled between FirstSU and and SecondSU.
+ for (const SDep &SI : FirstSU.Succs) {
+ if (SI.getSUnit() == &SecondSU)
+ continue;
+ DEBUG(dbgs() << " Copy Succ ";
+ SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';);
+ DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial));
+ }
+
+ ++NumFused;
+}
+
+
+/// \brief Post-process the DAG to create cluster edges between instrs that may
+/// be fused by the processor into a single operation.
+class MacroFusion : public ScheduleDAGMutation {
+ ShouldSchedulePredTy shouldScheduleAdjacent;
+ bool FuseBlock;
+ bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU);
+
+public:
+ MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
+ : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+
+void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
+ if (FuseBlock)
+ // For each of the SUnits in the scheduling block, try to fuse the instr in
+ // it with one in its predecessors.
+ for (SUnit &ISU : DAG->SUnits)
+ scheduleAdjacentImpl(*DAG, ISU);
+
+ if (DAG->ExitSU.getInstr())
+ // Try to fuse the instr in the ExitSU with one in its predecessors.
+ scheduleAdjacentImpl(*DAG, DAG->ExitSU);
+}
+
+/// \brief Implement the fusion of instr pairs in the scheduling DAG,
+/// anchored at the instr in AnchorSU..
+bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
+ const MachineInstr &AnchorMI = *AnchorSU.getInstr();
+ const TargetInstrInfo &TII = *DAG.TII;
+ const TargetSubtargetInfo &ST = DAG.MF.getSubtarget();
+
+ // Check if the anchor instr may be fused.
+ if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI))
+ return false;
+
+ // Explorer for fusion candidates among the dependencies of the anchor instr.
+ for (SDep &Dep : AnchorSU.Preds) {
+ // Ignore dependencies that don't enforce ordering.
+ if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output ||
+ Dep.isWeak())
+ continue;
+
+ SUnit &DepSU = *Dep.getSUnit();
+ if (DepSU.isBoundaryNode())
+ continue;
+
+ const MachineInstr *DepMI = DepSU.getInstr();
+ if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
+ continue;
+
+ fuseInstructionPair(DAG, DepSU, AnchorSU);
+ return true;
+ }
+
+ return false;
+}
+
+} // end anonymous namespace
+
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) {
+ if(EnableMacroFusion)
+ return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
+ return nullptr;
+}
+
+std::unique_ptr<ScheduleDAGMutation>
+createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent) {
+ if(EnableMacroFusion)
+ return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
+ return nullptr;
+}
+
+} // end namespace llvm
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 1aed58c36e17d..05e641d9489d9 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -35,6 +35,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
#include <cassert>
#include <iterator>
#include <limits>
@@ -260,6 +261,14 @@ void RegScavenger::backward() {
const MachineInstr &MI = *MBBI;
LiveUnits.stepBackward(MI);
+ // Expire scavenge spill frameindex uses.
+ for (ScavengedInfo &I : Scavenged) {
+ if (I.Restore == &MI) {
+ I.Reg = 0;
+ I.Restore = nullptr;
+ }
+ }
+
if (MBBI == MBB->begin()) {
MBBI = MachineBasicBlock::iterator(nullptr);
Tracking = false;
@@ -356,6 +365,80 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
return Survivor;
}
+/// Given the bitvector \p Available of free register units at position
+/// \p From. Search backwards to find a register that is part of \p
+/// Candidates and not used/clobbered until the point \p To. If there is
+/// multiple candidates continue searching and pick the one that is not used/
+/// clobbered for the longest time.
+/// Returns the register and the earliest position we know it to be free or
+/// the position MBB.end() if no register is available.
+static std::pair<MCPhysReg, MachineBasicBlock::iterator>
+findSurvivorBackwards(const MachineRegisterInfo &MRI,
+ MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
+ const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder) {
+ bool FoundTo = false;
+ MCPhysReg Survivor = 0;
+ MachineBasicBlock::iterator Pos;
+ MachineBasicBlock &MBB = *From->getParent();
+ unsigned InstrLimit = 25;
+ unsigned InstrCountDown = InstrLimit;
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ LiveRegUnits Used(TRI);
+
+ for (MachineBasicBlock::iterator I = From;; --I) {
+ const MachineInstr &MI = *I;
+
+ Used.accumulateBackward(MI);
+
+ if (I == To) {
+ // See if one of the registers in RC wasn't used so far.
+ for (MCPhysReg Reg : AllocationOrder) {
+ if (!MRI.isReserved(Reg) && Used.available(Reg) &&
+ LiveOut.available(Reg))
+ return std::make_pair(Reg, MBB.end());
+ }
+ // Otherwise we will continue up to InstrLimit instructions to find
+ // the register which is not defined/used for the longest time.
+ FoundTo = true;
+ Pos = To;
+ }
+ if (FoundTo) {
+ if (Survivor == 0 || !Used.available(Survivor)) {
+ MCPhysReg AvilableReg = 0;
+ for (MCPhysReg Reg : AllocationOrder) {
+ if (!MRI.isReserved(Reg) && Used.available(Reg)) {
+ AvilableReg = Reg;
+ break;
+ }
+ }
+ if (AvilableReg == 0)
+ break;
+ Survivor = AvilableReg;
+ }
+ if (--InstrCountDown == 0)
+ break;
+
+ // Keep searching when we find a vreg since the spilled register will
+ // be usefull for this other vreg as well later.
+ bool FoundVReg = false;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ FoundVReg = true;
+ break;
+ }
+ }
+ if (FoundVReg) {
+ InstrCountDown = InstrLimit;
+ Pos = I;
+ }
+ if (I == MBB.begin())
+ break;
+ }
+ }
+
+ return std::make_pair(Survivor, Pos);
+}
+
static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
@@ -365,44 +448,16 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
return i;
}
-unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
- MachineBasicBlock::iterator I,
- int SPAdj) {
- MachineInstr &MI = *I;
- const MachineFunction &MF = *MI.getParent()->getParent();
- // Consider all allocatable registers in the register class initially
- BitVector Candidates = TRI->getAllocatableSet(MF, RC);
-
- // Exclude all the registers being used by the instruction.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
- Candidates.reset(*AI);
- }
-
- // Try to find a register that's unused if there is one, as then we won't
- // have to spill.
- BitVector Available = getRegsAvailable(RC);
- Available &= Candidates;
- if (Available.any())
- Candidates = Available;
-
- // Find the register whose use is furthest away.
- MachineBasicBlock::iterator UseMI;
- unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
-
- // If we found an unused register there is no reason to spill it.
- if (!isRegUsed(SReg)) {
- DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
- return SReg;
- }
-
+RegScavenger::ScavengedInfo &
+RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
+ MachineBasicBlock::iterator Before,
+ MachineBasicBlock::iterator &UseMI) {
// Find an available scavenging slot with size and alignment matching
// the requirements of the class RC.
+ const MachineFunction &MF = *Before->getParent()->getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned NeedSize = TRI->getSpillSize(*RC);
- unsigned NeedAlign = TRI->getSpillAlignment(*RC);
+ unsigned NeedSize = TRI->getSpillSize(RC);
+ unsigned NeedAlign = TRI->getSpillAlignment(RC);
unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max();
int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd();
@@ -437,39 +492,72 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
}
// Avoid infinite regress
- Scavenged[SI].Reg = SReg;
+ Scavenged[SI].Reg = Reg;
// If the target knows how to save/restore the register, let it do so;
// otherwise, use the emergency stack spill slot.
- if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
- // Spill the scavenged register before I.
+ if (!TRI->saveScavengerRegister(*MBB, Before, UseMI, &RC, Reg)) {
+ // Spill the scavenged register before \p Before.
int FI = Scavenged[SI].FrameIndex;
if (FI < FIB || FI >= FIE) {
std::string Msg = std::string("Error while trying to spill ") +
- TRI->getName(SReg) + " from class " + TRI->getRegClassName(RC) +
+ TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) +
": Cannot scavenge register without an emergency spill slot!";
report_fatal_error(Msg.c_str());
}
- TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
- RC, TRI);
- MachineBasicBlock::iterator II = std::prev(I);
+ TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex,
+ &RC, TRI);
+ MachineBasicBlock::iterator II = std::prev(Before);
unsigned FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
- TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
- RC, TRI);
+ TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex,
+ &RC, TRI);
II = std::prev(UseMI);
FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
}
+ return Scavenged[SI];
+}
- Scavenged[SI].Restore = &*std::prev(UseMI);
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ MachineInstr &MI = *I;
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ // Consider all allocatable registers in the register class initially
+ BitVector Candidates = TRI->getAllocatableSet(MF, RC);
- // Doing this here leads to infinite regress.
- // Scavenged[SI].Reg = SReg;
+ // Exclude all the registers being used by the instruction.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Candidates.reset(*AI);
+ }
+
+ // Try to find a register that's unused if there is one, as then we won't
+ // have to spill.
+ BitVector Available = getRegsAvailable(RC);
+ Available &= Candidates;
+ if (Available.any())
+ Candidates = Available;
+
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+
+ // If we found an unused register there is no reason to spill it.
+ if (!isRegUsed(SReg)) {
+ DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+ return SReg;
+ }
+
+ ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
+ Scavenged.Restore = &*std::prev(UseMI);
DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
"\n");
@@ -477,85 +565,195 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
-void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
- // FIXME: Iterating over the instruction stream is unnecessary. We can simply
- // iterate over the vreg use list, which at this point only contains machine
- // operands for which eliminateFrameIndex need a new scratch reg.
+unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
+ MachineBasicBlock::iterator To,
+ bool RestoreAfter, int SPAdj) {
+ const MachineBasicBlock &MBB = *To->getParent();
+ const MachineFunction &MF = *MBB.getParent();
- // Run through the instructions and find any virtual registers.
- MachineRegisterInfo &MRI = MF.getRegInfo();
- for (MachineBasicBlock &MBB : MF) {
- RS.enterBasicBlock(MBB);
-
- int SPAdj = 0;
-
- // The instruction stream may change in the loop, so check MBB.end()
- // directly.
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
- // We might end up here again with a NULL iterator if we scavenged a
- // register for which we inserted spill code for definition by what was
- // originally the first instruction in MBB.
- if (I == MachineBasicBlock::iterator(nullptr))
- I = MBB.begin();
-
- const MachineInstr &MI = *I;
- MachineBasicBlock::iterator J = std::next(I);
- MachineBasicBlock::iterator P =
- I == MBB.begin() ? MachineBasicBlock::iterator(nullptr)
- : std::prev(I);
-
- // RS should process this instruction before we might scavenge at this
- // location. This is because we might be replacing a virtual register
- // defined by this instruction, and if so, registers killed by this
- // instruction are available, and defined registers are not.
- RS.forward(I);
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF);
+ std::pair<MCPhysReg, MachineBasicBlock::iterator> P =
+ findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder);
+ MCPhysReg Reg = P.first;
+ MachineBasicBlock::iterator SpillBefore = P.second;
+ assert(Reg != 0 && "No register left to scavenge!");
+ // Found an available register?
+ if (SpillBefore != MBB.end()) {
+ MachineBasicBlock::iterator ReloadAfter =
+ RestoreAfter ? std::next(MBBI) : MBBI;
+ MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
+ DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
+ ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
+ Scavenged.Restore = &*std::prev(SpillBefore);
+ LiveUnits.removeReg(Reg);
+ DEBUG(dbgs() << "Scavenged register with spill: " << PrintReg(Reg, TRI)
+ << " until " << *SpillBefore);
+ } else {
+ DEBUG(dbgs() << "Scavenged free register: " << PrintReg(Reg, TRI) << '\n');
+ }
+ return Reg;
+}
- for (const MachineOperand &MO : MI.operands()) {
+/// Allocate a register for the virtual register \p VReg. The last use of
+/// \p VReg is around the current position of the register scavenger \p RS.
+/// \p ReserveAfter controls whether the scavenged register needs to be reserved
+/// after the current instruction, otherwise it will only be reserved before the
+/// current instruction.
+static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
+ unsigned VReg, bool ReserveAfter) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+#ifndef NDEBUG
+ // Verify that all definitions and uses are in the same basic block.
+ const MachineBasicBlock *CommonMBB = nullptr;
+ // Real definition for the reg, re-definitions are not considered.
+ const MachineInstr *RealDef = nullptr;
+ for (MachineOperand &MO : MRI.reg_nodbg_operands(VReg)) {
+ MachineBasicBlock *MBB = MO.getParent()->getParent();
+ if (CommonMBB == nullptr)
+ CommonMBB = MBB;
+ assert(MBB == CommonMBB && "All defs+uses must be in the same basic block");
+ if (MO.isDef()) {
+ const MachineInstr &MI = *MO.getParent();
+ if (!MI.readsRegister(VReg, &TRI)) {
+ assert((!RealDef || RealDef == &MI) &&
+ "Can have at most one definition which is not a redefinition");
+ RealDef = &MI;
+ }
+ }
+ }
+ assert(RealDef != nullptr && "Must have at least 1 Def");
+#endif
+
+ // We should only have one definition of the register. However to accomodate
+ // the requirements of two address code we also allow definitions in
+ // subsequent instructions provided they also read the register. That way
+ // we get a single contiguous lifetime.
+ //
+ // Definitions in MRI.def_begin() are unordered, search for the first.
+ MachineRegisterInfo::def_iterator FirstDef =
+ std::find_if(MRI.def_begin(VReg), MRI.def_end(),
+ [VReg, &TRI](const MachineOperand &MO) {
+ return !MO.getParent()->readsRegister(VReg, &TRI);
+ });
+ assert(FirstDef != MRI.def_end() &&
+ "Must have one definition that does not redefine vreg");
+ MachineInstr &DefMI = *FirstDef->getParent();
+
+ // The register scavenger will report a free register inserting an emergency
+ // spill/reload if necessary.
+ int SPAdj = 0;
+ const TargetRegisterClass &RC = *MRI.getRegClass(VReg);
+ unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(),
+ ReserveAfter, SPAdj);
+ MRI.replaceRegWith(VReg, SReg);
+ ++NumScavengedRegs;
+ return SReg;
+}
+
+/// Allocate (scavenge) vregs inside a single basic block.
+/// Returns true if the target spill callback created new vregs and a 2nd pass
+/// is necessary.
+static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
+ RegScavenger &RS,
+ MachineBasicBlock &MBB) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ RS.enterBasicBlockEnd(MBB);
+
+ unsigned InitialNumVirtRegs = MRI.getNumVirtRegs();
+ bool NextInstructionReadsVReg = false;
+ for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) {
+ --I;
+ // Move RegScavenger to the position between *I and *std::next(I).
+ RS.backward(I);
+
+ // Look for unassigned vregs in the uses of *std::next(I).
+ if (NextInstructionReadsVReg) {
+ MachineBasicBlock::iterator N = std::next(I);
+ const MachineInstr &NMI = *N;
+ for (const MachineOperand &MO : NMI.operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ // We only care about virtual registers and ignore virtual registers
+ // created by the target callbacks in the process (those will be handled
+ // in a scavenging round).
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ continue;
+ if (!MO.readsReg())
continue;
- // When we first encounter a new virtual register, it
- // must be a definition.
- assert(MO.isDef() && "frame index virtual missing def!");
- // Scavenge a new scratch register
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
- unsigned ScratchReg = RS.scavengeRegister(RC, J, SPAdj);
+ unsigned SReg = scavengeVReg(MRI, RS, Reg, true);
+ N->addRegisterKilled(SReg, &TRI, false);
+ RS.setRegUsed(SReg);
+ }
+ }
- ++NumScavengedRegs;
+ // Look for unassigned vregs in the defs of *I.
+ NextInstructionReadsVReg = false;
+ const MachineInstr &MI = *I;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ // Only vregs, no newly created vregs (see above).
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ continue;
+ // We have to look at all operands anyway so we can precalculate here
+ // whether there is a reading operand. This allows use to skip the use
+ // step in the next iteration if there was none.
+ assert(!MO.isInternalRead() && "Cannot assign inside bundles");
+ assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
+ if (MO.readsReg()) {
+ NextInstructionReadsVReg = true;
+ }
+ if (MO.isDef()) {
+ unsigned SReg = scavengeVReg(MRI, RS, Reg, false);
+ I->addRegisterDead(SReg, &TRI, false);
+ }
+ }
+ }
+#ifndef NDEBUG
+ for (const MachineOperand &MO : MBB.front().operands()) {
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ assert(!MO.isInternalRead() && "Cannot assign inside bundles");
+ assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
+ assert(!MO.readsReg() && "Vreg use in first instruction not allowed");
+ }
+#endif
- // Replace this reference to the virtual register with the
- // scratch register.
- assert(ScratchReg && "Missing scratch register!");
- MRI.replaceRegWith(Reg, ScratchReg);
+ return MRI.getNumVirtRegs() != InitialNumVirtRegs;
+}
- // Because this instruction was processed by the RS before this
- // register was allocated, make sure that the RS now records the
- // register as being used.
- RS.setRegUsed(ScratchReg);
- }
+void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
+ // FIXME: Iterating over the instruction stream is unnecessary. We can simply
+ // iterate over the vreg use list, which at this point only contains machine
+ // operands for which eliminateFrameIndex need a new scratch reg.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Shortcut.
+ if (MRI.getNumVirtRegs() == 0) {
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+ return;
+ }
+
+ // Run through the instructions and find any virtual registers.
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.empty())
+ continue;
- // If the scavenger needed to use one of its spill slots, the
- // spill code will have been inserted in between I and J. This is a
- // problem because we need the spill code before I: Move I to just
- // prior to J.
- if (I != std::prev(J)) {
- MBB.splice(J, &MBB, I);
-
- // Before we move I, we need to prepare the RS to visit I again.
- // Specifically, RS will assert if it sees uses of registers that
- // it believes are undefined. Because we have already processed
- // register kills in I, when it visits I again, it will believe that
- // those registers are undefined. To avoid this situation, unprocess
- // the instruction I.
- assert(RS.getCurrentPosition() == I &&
- "The register scavenger has an unexpected position");
- I = P;
- RS.unprocess(P);
- } else
- ++I;
+ bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);
+ if (Again) {
+ DEBUG(dbgs() << "Warning: Required two scavenging passes for block "
+ << MBB.getName() << '\n');
+ Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);
+ // The target required a 2nd run (because it created new vregs while
+ // spilling). Refuse to do another pass to keep compiletime in check.
+ if (Again)
+ report_fatal_error("Incomplete scavenging after 2nd pass");
}
}
diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp
index d7a3ac0808230..30757f070cadb 100644
--- a/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/lib/CodeGen/RegisterUsageInfo.cpp
@@ -1,4 +1,4 @@
-//===- RegisterUsageInfo.cpp - Register Usage Informartion Storage --------===//
+//===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -38,7 +38,7 @@ static cl::opt<bool> DumpRegUsage(
cl::desc("print register usage details collected for analysis."));
INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",
- "Register Usage Informartion Stroage", false, true)
+ "Register Usage Information Storage", false, true)
char PhysicalRegisterUsageInfo::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index a668ddb7389f1..ae9c5adb03979 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_library(LLVMSelectionDAG
ScheduleDAGVLIW.cpp
SelectionDAGBuilder.cpp
SelectionDAG.cpp
+ SelectionDAGAddressAnalysis.cpp
SelectionDAGDumper.cpp
SelectionDAGISel.cpp
SelectionDAGPrinter.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2d4422d94a172..d02dcb6f4439b 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -469,7 +470,8 @@ namespace {
/// \return True if a merged store was created.
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
EVT MemVT, unsigned NumStores,
- bool IsConstantSrc, bool UseVector);
+ bool IsConstantSrc, bool UseVector,
+ bool UseTrunc);
/// This is a helper function for MergeConsecutiveStores.
/// Stores that may be merged are placed in StoreNodes.
@@ -2549,14 +2551,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
- if (N1IsConst && ConstValue1 == 0)
+ if (N1IsConst && ConstValue1.isNullValue())
return N1;
// We require a splat of the entire scalar bit width for non-contiguous
// bit patterns.
bool IsFullSplat =
ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
+ if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat)
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -3685,7 +3687,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
- if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+ if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
return N1;
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -4694,110 +4696,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
}
namespace {
-/// Helper struct to parse and store a memory address as base + index + offset.
-/// We ignore sign extensions when it is safe to do so.
-/// The following two expressions are not equivalent. To differentiate we need
-/// to store whether there was a sign extension involved in the index
-/// computation.
-/// (load (i64 add (i64 copyfromreg %c)
-/// (i64 signextend (add (i8 load %index)
-/// (i8 1))))
-/// vs
-///
-/// (load (i64 add (i64 copyfromreg %c)
-/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
-/// (i32 1)))))
-struct BaseIndexOffset {
- SDValue Base;
- SDValue Index;
- int64_t Offset;
- bool IsIndexSignExt;
-
- BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
-
- BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
- bool IsIndexSignExt) :
- Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
-
- bool equalBaseIndex(const BaseIndexOffset &Other) {
- return Other.Base == Base && Other.Index == Index &&
- Other.IsIndexSignExt == IsIndexSignExt;
- }
-
- /// Parses tree in Ptr for base, index, offset addresses.
- static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
- int64_t PartialOffset = 0) {
- bool IsIndexSignExt = false;
-
- // Split up a folded GlobalAddress+Offset into its component parts.
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
- if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
- return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
- SDLoc(GA),
- GA->getValueType(0),
- /*Offset=*/PartialOffset,
- /*isTargetGA=*/false,
- GA->getTargetFlags()),
- SDValue(),
- GA->getOffset(),
- IsIndexSignExt);
- }
-
- // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
- // instruction, then it could be just the BASE or everything else we don't
- // know how to handle. Just use Ptr as BASE and give up.
- if (Ptr->getOpcode() != ISD::ADD)
- return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
-
- // We know that we have at least an ADD instruction. Try to pattern match
- // the simple case of BASE + OFFSET.
- if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
- int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
- return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
- }
-
- // Inside a loop the current BASE pointer is calculated using an ADD and a
- // MUL instruction. In this case Ptr is the actual BASE pointer.
- // (i64 add (i64 %array_ptr)
- // (i64 mul (i64 %induction_var)
- // (i64 %element_size)))
- if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
- return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
-
- // Look at Base + Index + Offset cases.
- SDValue Base = Ptr->getOperand(0);
- SDValue IndexOffset = Ptr->getOperand(1);
-
- // Skip signextends.
- if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
- IndexOffset = IndexOffset->getOperand(0);
- IsIndexSignExt = true;
- }
-
- // Either the case of Base + Index (no offset) or something else.
- if (IndexOffset->getOpcode() != ISD::ADD)
- return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
-
- // Now we have the case of Base + Index + offset.
- SDValue Index = IndexOffset->getOperand(0);
- SDValue Offset = IndexOffset->getOperand(1);
-
- if (!isa<ConstantSDNode>(Offset))
- return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
-
- // Ignore signextends.
- if (Index->getOpcode() == ISD::SIGN_EXTEND) {
- Index = Index->getOperand(0);
- IsIndexSignExt = true;
- } else IsIndexSignExt = false;
-
- int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
- return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
- }
-};
-} // namespace
-
-namespace {
/// Represents known origin of an individual byte in load combine pattern. The
/// value of the byte is either constant zero or comes from memory.
struct ByteProvider {
@@ -5017,14 +4915,15 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Loads must share the same base address
- BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr());
+ int64_t ByteOffsetFromBase = 0;
if (!Base)
Base = Ptr;
- else if (!Base->equalBaseIndex(Ptr))
+ else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
return SDValue();
// Calculate the offset of the current byte from the base address
- int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P);
+ ByteOffsetFromBase += MemoryByteOffset(*P);
ByteOffsets[i] = ByteOffsetFromBase;
// Remember the first byte load
@@ -12378,8 +12277,8 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
- SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
- unsigned NumStores, bool IsConstantSrc, bool UseVector) {
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector, bool UseTrunc) {
// Make sure we have something to merge.
if (NumStores < 2)
return false;
@@ -12464,7 +12363,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
- if (TLI.isTypeLegal(StoredVal.getValueType())) {
+ if (UseVector || !UseTrunc) {
NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
FirstInChain->getAlignment());
@@ -12495,15 +12394,15 @@ void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
EVT MemVT = St->getMemoryVT();
// We must have a base and an offset.
- if (!BasePtr.Base.getNode())
+ if (!BasePtr.getBase().getNode())
return;
// Do not handle stores to undef base pointers.
- if (BasePtr.Base.isUndef())
+ if (BasePtr.getBase().isUndef())
return;
bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
@@ -12515,10 +12414,11 @@ void DAGCombiner::getStoreMergeCandidates(
BaseIndexOffset LBasePtr;
// Match on loadbaseptr if relevant.
if (IsLoadSrc)
- LBasePtr = BaseIndexOffset::match(
- cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
+ LBasePtr =
+ BaseIndexOffset::match(cast<LoadSDNode>(St->getValue())->getBasePtr());
- auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool {
+ auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
+ int64_t &Offset) -> bool {
if (Other->isVolatile() || Other->isIndexed())
return false;
// We can merge constant floats to equivalent integers
@@ -12529,8 +12429,8 @@ void DAGCombiner::getStoreMergeCandidates(
if (IsLoadSrc) {
// The Load's Base Ptr must also match
if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
- auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
- if (!(LBasePtr.equalBaseIndex(LPtr)))
+ auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr());
+ if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
return false;
@@ -12543,8 +12443,8 @@ void DAGCombiner::getStoreMergeCandidates(
if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
return false;
- Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
- return (Ptr.equalBaseIndex(BasePtr));
+ Ptr = BaseIndexOffset::match(Other->getBasePtr());
+ return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
// We looking for a root node which is an ancestor to all mergable
// stores. We search up through a load, to our root and then down
@@ -12572,16 +12472,18 @@ void DAGCombiner::getStoreMergeCandidates(
if (I2.getOperandNo() == 0)
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
BaseIndexOffset Ptr;
- if (CandidateMatch(OtherST, Ptr))
- StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
} else
for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
if (I.getOperandNo() == 0)
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
BaseIndexOffset Ptr;
- if (CandidateMatch(OtherST, Ptr))
- StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
}
@@ -12721,8 +12623,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned LastLegalType = 0;
- unsigned LastLegalVectorType = 0;
+ unsigned LastLegalType = 1;
+ unsigned LastLegalVectorType = 1;
+ bool LastIntegerTrunc = false;
bool NonZero = false;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
@@ -12747,6 +12650,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast) {
+ LastIntegerTrunc = false;
LastLegalType = i + 1;
// Or check whether a truncstore is legal.
} else if (TLI.getTypeAction(Context, StoreTy) ==
@@ -12758,6 +12662,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
FirstStoreAS, FirstStoreAlign, &IsFast) &&
IsFast) {
+ LastIntegerTrunc = true;
LastLegalType = i + 1;
}
}
@@ -12787,8 +12692,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
- bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
- true, UseVector);
+ bool Merged = MergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
if (!Merged) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
continue;
@@ -12836,7 +12741,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
}
bool Merged = MergeStoresOfConstantsOrVecElts(
- StoreNodes, MemVT, NumStoresToMerge, false, true);
+ StoreNodes, MemVT, NumStoresToMerge, false, true, false);
if (!Merged) {
StoreNodes.erase(StoreNodes.begin(),
StoreNodes.begin() + NumStoresToMerge);
@@ -12881,11 +12786,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (Ld->getMemoryVT() != MemVT)
break;
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
// If this is not the first ptr that we check.
- if (LdBasePtr.Base.getNode()) {
+ int64_t LdOffset = 0;
+ if (LdBasePtr.getBase().getNode()) {
// The base ptr must be the same.
- if (!LdPtr.equalBaseIndex(LdBasePtr))
+ if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
break;
} else {
// Check that all other base pointers are the same as this one.
@@ -12893,7 +12799,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
}
// We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
+ LoadNodes.push_back(MemOpLink(Ld, LdOffset));
}
if (LoadNodes.size() < 2) {
@@ -12919,10 +12825,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Scan the memory operations on the chain and find the first
// non-consecutive load memory address. These variables hold the index in
// the store node array.
- unsigned LastConsecutiveLoad = 0;
+ unsigned LastConsecutiveLoad = 1;
// This variable refers to the size and not index in the array.
- unsigned LastLegalVectorType = 0;
- unsigned LastLegalIntegerType = 0;
+ unsigned LastLegalVectorType = 1;
+ unsigned LastLegalIntegerType = 1;
+ bool DoIntegerTruncate = false;
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
@@ -12958,11 +12865,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
FirstLoadAlign, &IsFastLd) &&
- IsFastLd)
+ IsFastLd) {
LastLegalIntegerType = i + 1;
- // Or check whether a truncstore and extload is legal.
- else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
+ DoIntegerTruncate = false;
+ // Or check whether a truncstore and extload is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) &&
@@ -12976,8 +12884,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
FirstLoadAlign, &IsFastLd) &&
- IsFastLd)
+ IsFastLd) {
LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = true;
+ }
}
}
@@ -13012,17 +12922,31 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// The merged loads are required to have the same incoming chain, so
// using the first's chain is acceptable.
- SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
- FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoadAlign);
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
-
AddToWorklist(NewStoreChain.getNode());
- SDValue NewStore = DAG.getStore(
- NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign);
+ SDValue NewLoad, NewStore;
+ if (UseVectorTy || !DoIntegerTruncate) {
+ NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoadAlign);
+ NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign);
+ } else { // This must be the truncstore/extload case
+ EVT ExtendedTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
+ NewLoad =
+ DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
+ JointMemOpVT, FirstLoadAlign);
+ NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), JointMemOpVT,
+ FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
// Transfer chain users from old loads to the new load.
for (unsigned i = 0; i < NumElem; ++i) {
@@ -13285,7 +13209,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Only perform this optimization before the types are legal, because we
// don't want to perform this optimization on every DAGCombine invocation.
- if (!LegalTypes) {
+ if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
+ : !LegalTypes) {
for (;;) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
@@ -14035,6 +13960,11 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// when we start sorting the vectors by type.
return SDValue();
}
+ } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
+ InVT1.getSizeInBits() == VT.getSizeInBits()) {
+ SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
+ ConcatOps[0] = VecIn2;
+ VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
} else {
// TODO: Support cases where the length mismatch isn't exactly by a
// factor of 2.
@@ -16610,11 +16540,11 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
// Check for BaseIndexOffset matching.
- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
- if (BasePtr0.equalBaseIndex(BasePtr1))
- return !((BasePtr0.Offset + NumBytes0 <= BasePtr1.Offset) ||
- (BasePtr1.Offset + NumBytes1 <= BasePtr0.Offset));
+ BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr());
+ BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr());
+ int64_t PtrDiff;
+ if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
+ return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
// FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
// modified to use BaseIndexOffset.
@@ -16821,14 +16751,14 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
// We must have a base and an offset.
- if (!BasePtr.Base.getNode())
+ if (!BasePtr.getBase().getNode())
return false;
// Do not handle stores to undef base pointers.
- if (BasePtr.Base.isUndef())
+ if (BasePtr.getBase().isUndef())
return false;
SmallVector<StoreSDNode *, 8> ChainedStores;
@@ -16847,10 +16777,10 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
// Check that the base pointer is the same as the original one.
- if (!Ptr.equalBaseIndex(BasePtr))
+ if (!BasePtr.equalBaseIndex(Ptr, DAG))
break;
// Walk up the chain to find the next store node, ignoring any
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 15e87b7af18dc..873b2bd48f1e0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3530,17 +3530,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
LC = RTLIB::MUL_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
- // The high part is obtained by SRA'ing all but one of the bits of low
- // part.
- unsigned LoSize = VT.getSizeInBits();
- SDValue HiLHS =
- DAG.getNode(ISD::SRA, dl, VT, LHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy(DAG.getDataLayout())));
- SDValue HiRHS =
- DAG.getNode(ISD::SRA, dl, VT, RHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ SDValue HiLHS;
+ SDValue HiRHS;
+ if (isSigned) {
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ HiLHS =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ HiRHS =
+ DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ } else {
+ HiLHS = DAG.getConstant(0, dl, VT);
+ HiRHS = DAG.getConstant(0, dl, VT);
+ }
// Here we're passing the 2 arguments explicitly as 4 arguments that are
// pre-lowered to the correct types. This all depends upon WideVT not
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a3ba52a148ee4..75fec7bd1d485 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -615,9 +615,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS,
N->getOperand(2));
- assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
// Convert to the expected type.
- return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+ return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 593efc5121f90..70b1fa77a0991 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1861,28 +1861,68 @@ static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
/// Smaller number is the higher priority.
static unsigned
CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
- unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
- if (SethiUllmanNumber != 0)
- return SethiUllmanNumber;
-
- unsigned Extra = 0;
- for (const SDep &Pred : SU->Preds) {
- if (Pred.isCtrl()) continue; // ignore chain preds
- SUnit *PredSU = Pred.getSUnit();
- unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
- if (PredSethiUllman > SethiUllmanNumber) {
- SethiUllmanNumber = PredSethiUllman;
- Extra = 0;
- } else if (PredSethiUllman == SethiUllmanNumber)
- ++Extra;
- }
+ if (SUNumbers[SU->NodeNum] != 0)
+ return SUNumbers[SU->NodeNum];
+
+ // Use WorkList to avoid stack overflow on excessively large IRs.
+ struct WorkState {
+ WorkState(const SUnit *SU) : SU(SU) {}
+ const SUnit *SU;
+ unsigned PredsProcessed = 0;
+ };
- SethiUllmanNumber += Extra;
+ SmallVector<WorkState, 16> WorkList;
+ WorkList.push_back(SU);
+ while (!WorkList.empty()) {
+ auto &Temp = WorkList.back();
+ auto *TempSU = Temp.SU;
+ bool AllPredsKnown = true;
+ // Try to find a non-evaluated pred and push it into the processing stack.
+ for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
+ auto &Pred = TempSU->Preds[P];
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ if (SUNumbers[PredSU->NodeNum] == 0) {
+#ifndef NDEBUG
+ // In debug mode, check that we don't have such element in the stack.
+ for (auto It : WorkList)
+ assert(It.SU != PredSU && "Trying to push an element twice?");
+#endif
+ // Next time start processing this one starting from the next pred.
+ Temp.PredsProcessed = P + 1;
+ WorkList.push_back(PredSU);
+ AllPredsKnown = false;
+ break;
+ }
+ }
- if (SethiUllmanNumber == 0)
- SethiUllmanNumber = 1;
+ if (!AllPredsKnown)
+ continue;
- return SethiUllmanNumber;
+ // Once all preds are known, we can calculate the answer for this one.
+ unsigned SethiUllmanNumber = 0;
+ unsigned Extra = 0;
+ for (const SDep &Pred : TempSU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
+ assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+ SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
+ WorkList.pop_back();
+ }
+
+ assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
+ return SUNumbers[SU->NodeNum];
}
/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7abdc76cb004f..98553152117d1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4897,6 +4897,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// TODO: In the AlwaysInline case, if the size is big then generate a loop
// rather than maybe a humongous number of loads and stores.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
@@ -4923,15 +4925,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return SDValue();
if (DstAlignCanChange) {
- Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
while (NewAlign > Align &&
- DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign))
+ DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign /= 2;
if (NewAlign > Align) {
@@ -4991,12 +4993,19 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// thing to do is generate a LoadExt/StoreTrunc pair. These simplify
// to Load/Store if NVT==VT.
// FIXME does the case above also need this?
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ EVT NVT = TLI.getTypeToTransformTo(C, VT);
assert(NVT.bitsGE(VT));
+
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
- MinAlign(SrcAlign, SrcOff), MMOFlags);
+ MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
OutChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
@@ -5024,6 +5033,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// Expand memmove to a series of load and store ops if the size operand falls
// below a certain threshold.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
@@ -5046,8 +5057,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return SDValue();
if (DstAlignCanChange) {
- Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -5068,9 +5079,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value;
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
Value =
DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
- SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags);
+ SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
new file mode 100644
index 0000000000000..d2e0dbbf88ecd
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -0,0 +1,95 @@
+//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address
+//Analysis ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
+ const SelectionDAG &DAG, int64_t &Off) {
+ // Obvious equivalent
+ Off = Other.Offset - Offset;
+ if (Other.Base == Base && Other.Index == Index &&
+ Other.IsIndexSignExt == IsIndexSignExt)
+ return true;
+
+ // Match GlobalAddresses
+ if (Index == Other.Index)
+ if (GlobalAddressSDNode *A = dyn_cast<GlobalAddressSDNode>(Base))
+ if (GlobalAddressSDNode *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
+ if (A->getGlobal() == B->getGlobal()) {
+ Off += B->getOffset() - A->getOffset();
+ return true;
+ }
+
+ // TODO: we should be able to add FrameIndex analysis improvements here.
+
+ return false;
+}
+
+/// Parses tree in Ptr for base, index, offset addresses.
+BaseIndexOffset BaseIndexOffset::match(SDValue Ptr) {
+ // (((B + I*M) + c)) + c ...
+ SDValue Base = Ptr;
+ SDValue Index = SDValue();
+ int64_t Offset = 0;
+ bool IsIndexSignExt = false;
+
+ // Consume constant adds
+ while (Base->getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Base->getOperand(1))) {
+ int64_t POffset = cast<ConstantSDNode>(Base->getOperand(1))->getSExtValue();
+ Offset += POffset;
+ Base = Base->getOperand(0);
+ }
+
+ if (Base->getOpcode() == ISD::ADD) {
+ // TODO: The following code appears to be needless as it just
+ // bails on some Ptrs early, reducing the cases where we
+ // find equivalence. We should be able to remove this.
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Base is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Base->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
+
+ // Look at Base + Index + Offset cases.
+ Index = Base->getOperand(1);
+ SDValue PotentialBase = Base->getOperand(0);
+
+ // Skip signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Check if Index Offset pattern
+ if (Index->getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Index->getOperand(1)))
+ return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt);
+
+ Offset += cast<ConstantSDNode>(Index->getOperand(1))->getSExtValue();
+ Index = Index->getOperand(0);
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else
+ IsIndexSignExt = false;
+ Base = PotentialBase;
+ }
+ return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
+}
+} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index dcccd17bb98ea..f711ca71f79fe 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -337,12 +337,13 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
/// may trap on it. In this case we have to split the edge so that the path
/// through the predecessor block that doesn't go to the phi block doesn't
-/// execute the possibly trapping instruction. If available, we pass a
-/// dominator tree to be updated when we split critical edges. This is because
-/// SelectionDAGISel preserves the DominatorTree.
+/// execute the possibly trapping instruction. If available, we pass domtree
+/// and loop info to be updated when we split critical edges. This is because
+/// SelectionDAGISel preserves these analyses.
/// This is required for correctness, so it must be done at -O0.
///
-static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT) {
+static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
+ LoopInfo *LI) {
// Loop for blocks with phi nodes.
for (BasicBlock &BB : Fn) {
PHINode *PN = dyn_cast<PHINode>(BB.begin());
@@ -368,7 +369,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT) {
// Okay, we have to split this edge.
SplitCriticalEdge(
Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
- CriticalEdgeSplittingOptions(DT).setMergeIdenticalEdges());
+ CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges());
goto ReprocessBlock;
}
}
@@ -406,10 +407,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT);
+ SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
CurDAG->init(*MF, *ORE);
FuncInfo->set(Fn, *MF, CurDAG);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cfda0fffd031a..8652df7bbd706 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -365,10 +365,10 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
// If this is a 'not' op, don't touch it because that's a canonical form.
const APInt &C = Op1C->getAPIntValue();
- if (Opcode == ISD::XOR && (C | ~Demanded).isAllOnesValue())
+ if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
return false;
- if (C.intersects(~Demanded)) {
+ if (!C.isSubsetOf(Demanded)) {
EVT VT = Op.getValueType();
SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
@@ -919,7 +919,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (NewMask == 1)
+ if (NewMask.isOneValue())
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
@@ -1349,7 +1349,7 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
case UndefinedBooleanContent:
return CVal[0];
case ZeroOrOneBooleanContent:
- return CVal == 1;
+ return CVal.isOneValue();
case ZeroOrNegativeOneBooleanContent:
return CVal.isAllOnesValue();
}
@@ -1506,7 +1506,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
- if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
const APInt &ShAmt
@@ -1666,7 +1666,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
for (unsigned width = origWidth / 2; width>=8; width /= 2) {
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
- if ((newMask & Mask) == Mask) {
+ if (Mask.isSubsetOf(newMask)) {
if (DAG.getDataLayout().isLittleEndian())
bestOffset = (uint64_t)offset * (width/8);
else
@@ -1785,12 +1785,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ExtSrcTyBits),
dl, ExtDstTy),
Cond);
- } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+ } else if ((N1C->isNullValue() || N1C->isOne()) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
- bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
if (TrueWhenTrue)
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
@@ -1807,7 +1807,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
- cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+ cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
// can only do this if the top bits are known zero.
unsigned BitWidth = N0.getValueSizeInBits();
@@ -1830,7 +1830,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, Val, N1,
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
- } else if (N1C->getAPIntValue() == 1 &&
+ } else if (N1C->isOne() &&
(VT == MVT::i1 ||
getBooleanContents(N0->getValueType(0)) ==
ZeroOrOneBooleanContent)) {
@@ -1848,7 +1848,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (Op0.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Op0.getOperand(1)) &&
- cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+ cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
if (Op0.getValueType().bitsGT(VT))
Op0 = DAG.getNode(ISD::AND, dl, VT,
@@ -2482,7 +2482,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// gcc prints these as sign extended. Sign extend value to 64 bits
// now; without this it would get ZExt'd later in
// ScheduleDAGSDNodes::EmitNode, which is very generic.
- Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+ Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
SDLoc(C), MVT::i64));
}
return;
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index c8537ad2f3130..eeb00a784b0d9 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//==- TargetRegisterInfo.cpp - Target Register Information Implementation --==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,17 +11,27 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <utility>
#define DEBUG_TYPE "target-reg-info"
@@ -38,7 +48,7 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
CoveringLanes(SRICoveringLanes) {
}
-TargetRegisterInfo::~TargetRegisterInfo() {}
+TargetRegisterInfo::~TargetRegisterInfo() = default;
void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg)
const {
@@ -126,7 +136,7 @@ Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
});
}
-} // End of llvm namespace
+} // end namespace llvm
/// getAllocatableClass - Return the maximal subclass of the given register
/// class that is alloctable, or NULL.
diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp
index 82e85bab14747..f6d5bc80ddffb 100644
--- a/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
+//===- TargetSubtargetInfo.cpp - General Target Information ----------------==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,15 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <string>
+
using namespace llvm;
-//---------------------------------------------------------------------------
-// TargetSubtargetInfo Class
-//
TargetSubtargetInfo::TargetSubtargetInfo(
const Triple &TT, StringRef CPU, StringRef FS,
ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
@@ -29,7 +31,7 @@ TargetSubtargetInfo::TargetSubtargetInfo(
: MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
}
-TargetSubtargetInfo::~TargetSubtargetInfo() {}
+TargetSubtargetInfo::~TargetSubtargetInfo() = default;
bool TargetSubtargetInfo::enableAtomicExpand() const {
return true;
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
index f916695a84392..b94bb0c80c793 100644
--- a/lib/DebugInfo/CodeView/CMakeLists.txt
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -24,11 +24,10 @@ add_llvm_library(LLVMDebugInfoCodeView
SymbolRecordMapping.cpp
SymbolDumper.cpp
SymbolSerializer.cpp
- TypeDatabase.cpp
- TypeDatabaseVisitor.cpp
TypeDumpVisitor.cpp
TypeIndex.cpp
TypeIndexDiscovery.cpp
+ TypeName.cpp
TypeRecordMapping.cpp
TypeSerializer.cpp
TypeStreamMerger.cpp
diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index f0debd9e97023..22f166a2335d6 100644
--- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -12,8 +12,6 @@
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
#include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
index 334c5e002bbca..d69eca018e0c1 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -53,12 +53,16 @@ DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder(
std::shared_ptr<DebugSubsection> Subsection, CodeViewContainer Container)
: Subsection(std::move(Subsection)), Container(Container) {}
+DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder(
+ const DebugSubsectionRecord &Contents, CodeViewContainer Container)
+ : Contents(Contents), Container(Container) {}
+
uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() {
- // The length of the entire subsection is always padded to 4 bytes, regardless
- // of the container kind.
- uint32_t Size = sizeof(DebugSubsectionHeader) +
- alignTo(Subsection->calculateSerializedSize(), 4);
- return Size;
+ uint32_t DataSize = Subsection ? Subsection->calculateSerializedSize()
+ : Contents.getRecordData().getLength();
+ // The length of the entire subsection is always padded to 4 bytes,
+ // regardless of the container kind.
+ return sizeof(DebugSubsectionHeader) + alignTo(DataSize, 4);
}
Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) const {
@@ -66,16 +70,22 @@ Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) const {
"Debug Subsection not properly aligned");
DebugSubsectionHeader Header;
- Header.Kind = uint32_t(Subsection->kind());
+ Header.Kind = uint32_t(Subsection ? Subsection->kind() : Contents.kind());
// The value written into the Header's Length field is only padded to the
// container's alignment
- Header.Length =
- alignTo(Subsection->calculateSerializedSize(), alignOf(Container));
+ uint32_t DataSize = Subsection ? Subsection->calculateSerializedSize()
+ : Contents.getRecordData().getLength();
+ Header.Length = alignTo(DataSize, alignOf(Container));
if (auto EC = Writer.writeObject(Header))
return EC;
- if (auto EC = Subsection->commit(Writer))
- return EC;
+ if (Subsection) {
+ if (auto EC = Subsection->commit(Writer))
+ return EC;
+ } else {
+ if (auto EC = Writer.writeStreamRef(Contents.getRecordData()))
+ return EC;
+ }
if (auto EC = Writer.padToAlignment(4))
return EC;
diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp
index 01d8ccf2d31e8..ec00af28395e5 100644
--- a/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -82,6 +82,13 @@ static const EnumEntry<uint16_t> RegisterNames[] = {
CV_ENUM_CLASS_ENT(RegisterId, R15),
};
+static const EnumEntry<uint32_t> PublicSymFlagNames[] = {
+ CV_ENUM_CLASS_ENT(PublicSymFlags, Code),
+ CV_ENUM_CLASS_ENT(PublicSymFlags, Function),
+ CV_ENUM_CLASS_ENT(PublicSymFlags, Managed),
+ CV_ENUM_CLASS_ENT(PublicSymFlags, MSIL),
+};
+
static const EnumEntry<uint8_t> ProcSymFlagNames[] = {
CV_ENUM_CLASS_ENT(ProcSymFlags, HasFP),
CV_ENUM_CLASS_ENT(ProcSymFlags, HasIRET),
@@ -338,6 +345,9 @@ ArrayRef<EnumEntry<uint16_t>> getRegisterNames() {
return makeArrayRef(RegisterNames);
}
+ArrayRef<EnumEntry<uint32_t>> getPublicSymFlagNames() {
+ return makeArrayRef(PublicSymFlagNames);
+}
ArrayRef<EnumEntry<uint8_t>> getProcSymFlagNames() {
return makeArrayRef(ProcSymFlagNames);
}
diff --git a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index 39eb4099ce9e6..20f7e72c3af39 100644
--- a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -11,7 +11,7 @@
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
+#include "llvm/DebugInfo/CodeView/TypeName.h"
#include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
@@ -31,15 +31,13 @@ LazyRandomTypeCollection::LazyRandomTypeCollection(uint32_t RecordCountHint)
LazyRandomTypeCollection::LazyRandomTypeCollection(
const CVTypeArray &Types, uint32_t RecordCountHint,
PartialOffsetArray PartialOffsets)
- : Database(RecordCountHint), Types(Types), DatabaseVisitor(Database),
- PartialOffsets(PartialOffsets) {
- KnownOffsets.resize(Database.capacity());
+ : NameStorage(Allocator), Types(Types), PartialOffsets(PartialOffsets) {
+ Records.resize(RecordCountHint);
}
LazyRandomTypeCollection::LazyRandomTypeCollection(ArrayRef<uint8_t> Data,
uint32_t RecordCountHint)
: LazyRandomTypeCollection(RecordCountHint) {
- reset(Data);
}
LazyRandomTypeCollection::LazyRandomTypeCollection(StringRef Data,
@@ -52,50 +50,88 @@ LazyRandomTypeCollection::LazyRandomTypeCollection(const CVTypeArray &Types,
uint32_t NumRecords)
: LazyRandomTypeCollection(Types, NumRecords, PartialOffsetArray()) {}
-void LazyRandomTypeCollection::reset(StringRef Data) {
- reset(makeArrayRef(Data.bytes_begin(), Data.bytes_end()));
-}
-
-void LazyRandomTypeCollection::reset(ArrayRef<uint8_t> Data) {
+void LazyRandomTypeCollection::reset(StringRef Data, uint32_t RecordCountHint) {
+ Count = 0;
PartialOffsets = PartialOffsetArray();
BinaryStreamReader Reader(Data, support::little);
error(Reader.readArray(Types, Reader.getLength()));
- KnownOffsets.resize(Database.capacity());
+ // Clear and then resize, to make sure existing data gets destroyed.
+ Records.clear();
+ Records.resize(RecordCountHint);
+}
+
+void LazyRandomTypeCollection::reset(ArrayRef<uint8_t> Data,
+ uint32_t RecordCountHint) {
+ reset(toStringRef(Data), RecordCountHint);
+}
+
+uint32_t LazyRandomTypeCollection::getOffsetOfType(TypeIndex Index) {
+ error(ensureTypeExists(Index));
+ assert(contains(Index));
+
+ return Records[Index.toArrayIndex()].Offset;
}
CVType LazyRandomTypeCollection::getType(TypeIndex Index) {
error(ensureTypeExists(Index));
- return Database.getTypeRecord(Index);
+ assert(contains(Index));
+
+ return Records[Index.toArrayIndex()].Type;
}
StringRef LazyRandomTypeCollection::getTypeName(TypeIndex Index) {
- if (!Index.isSimple()) {
- // Try to make sure the type exists. Even if it doesn't though, it may be
- // because we're dumping a symbol stream with no corresponding type stream
- // present, in which case we still want to be able to print <unknown UDT>
- // for the type names.
- consumeError(ensureTypeExists(Index));
+ if (Index.isNoneType() || Index.isSimple())
+ return TypeIndex::simpleTypeName(Index);
+
+ // Try to make sure the type exists. Even if it doesn't though, it may be
+ // because we're dumping a symbol stream with no corresponding type stream
+ // present, in which case we still want to be able to print <unknown UDT>
+ // for the type names.
+ if (auto EC = ensureTypeExists(Index)) {
+ consumeError(std::move(EC));
+ return "<unknown UDT>";
}
- return Database.getTypeName(Index);
+ uint32_t I = Index.toArrayIndex();
+ ensureCapacityFor(Index);
+ if (Records[I].Name.data() == nullptr) {
+ StringRef Result = NameStorage.save(computeTypeName(*this, Index));
+ Records[I].Name = Result;
+ }
+ return Records[I].Name;
}
bool LazyRandomTypeCollection::contains(TypeIndex Index) {
- return Database.contains(Index);
+ if (Records.size() <= Index.toArrayIndex())
+ return false;
+ if (!Records[Index.toArrayIndex()].Type.valid())
+ return false;
+ return true;
}
-uint32_t LazyRandomTypeCollection::size() { return Database.size(); }
+uint32_t LazyRandomTypeCollection::size() { return Count; }
-uint32_t LazyRandomTypeCollection::capacity() { return Database.capacity(); }
+uint32_t LazyRandomTypeCollection::capacity() { return Records.size(); }
Error LazyRandomTypeCollection::ensureTypeExists(TypeIndex TI) {
- if (!Database.contains(TI)) {
- if (auto EC = visitRangeForType(TI))
- return EC;
- }
- return Error::success();
+ if (contains(TI))
+ return Error::success();
+
+ return visitRangeForType(TI);
+}
+
+void LazyRandomTypeCollection::ensureCapacityFor(TypeIndex Index) {
+ uint32_t MinSize = Index.toArrayIndex() + 1;
+
+ if (MinSize <= capacity())
+ return;
+
+ uint32_t NewCapacity = MinSize * 3 / 2;
+
+ assert(NewCapacity > capacity());
+ Records.resize(NewCapacity);
}
Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) {
@@ -111,7 +147,7 @@ Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) {
auto Prev = std::prev(Next);
TypeIndex TIB = Prev->Type;
- if (Database.contains(TIB)) {
+ if (contains(TIB)) {
// They've asked us to fetch a type index, but the entry we found in the
// partial offsets array has already been visited. Since we visit an entire
// block every time, that means this record should have been previously
@@ -122,13 +158,12 @@ Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) {
TypeIndex TIE;
if (Next == PartialOffsets.end()) {
- TIE = TypeIndex::fromArrayIndex(Database.capacity());
+ TIE = TypeIndex::fromArrayIndex(capacity());
} else {
TIE = Next->Type;
}
- if (auto EC = visitRange(TIB, Prev->Offset, TIE))
- return EC;
+ visitRange(TIB, Prev->Offset, TIE);
return Error::success();
}
@@ -157,34 +192,31 @@ Error LazyRandomTypeCollection::fullScanForType(TypeIndex TI) {
assert(PartialOffsets.empty());
TypeIndex CurrentTI = TypeIndex::fromArrayIndex(0);
- uint32_t Offset = 0;
auto Begin = Types.begin();
- if (!Database.empty()) {
+ if (Count > 0) {
// In the case of type streams which we don't know the number of records of,
// it's possible to search for a type index triggering a full scan, but then
// later additional records are added since we didn't know how many there
// would be until we did a full visitation, then you try to access the new
// type triggering another full scan. To avoid this, we assume that if the
- // database has some records, this must be what's going on. So we ask the
- // database for the largest type index less than the one we're searching for
- // and only do the forward scan from there.
- auto Prev = Database.largestTypeIndexLessThan(TI);
- assert(Prev.hasValue() && "Empty database with valid types?");
- Offset = KnownOffsets[Prev->toArrayIndex()];
- CurrentTI = *Prev;
- ++CurrentTI;
+ // database has some records, this must be what's going on. We can also
+ // assume that this index must be larger than the largest type index we've
+ // visited, so we start from there and scan forward.
+ uint32_t Offset = Records[LargestTypeIndex.toArrayIndex()].Offset;
+ CurrentTI = LargestTypeIndex + 1;
Begin = Types.at(Offset);
++Begin;
- Offset = Begin.offset();
}
auto End = Types.end();
while (Begin != End) {
- if (auto EC = visitOneRecord(CurrentTI, Offset, *Begin))
- return EC;
-
- Offset += Begin.getRecordLength();
+ ensureCapacityFor(CurrentTI);
+ LargestTypeIndex = std::max(LargestTypeIndex, CurrentTI);
+ auto Idx = CurrentTI.toArrayIndex();
+ Records[Idx].Type = *Begin;
+ Records[Idx].Offset = Begin.offset();
+ ++Count;
++Begin;
++CurrentTI;
}
@@ -194,36 +226,19 @@ Error LazyRandomTypeCollection::fullScanForType(TypeIndex TI) {
return Error::success();
}
-Error LazyRandomTypeCollection::visitRange(TypeIndex Begin,
- uint32_t BeginOffset,
- TypeIndex End) {
-
+void LazyRandomTypeCollection::visitRange(TypeIndex Begin, uint32_t BeginOffset,
+ TypeIndex End) {
auto RI = Types.at(BeginOffset);
assert(RI != Types.end());
+ ensureCapacityFor(End);
while (Begin != End) {
- if (auto EC = visitOneRecord(Begin, BeginOffset, *RI))
- return EC;
-
- BeginOffset += RI.getRecordLength();
+ LargestTypeIndex = std::max(LargestTypeIndex, Begin);
+ auto Idx = Begin.toArrayIndex();
+ Records[Idx].Type = *RI;
+ Records[Idx].Offset = RI.offset();
+ ++Count;
++Begin;
++RI;
}
-
- return Error::success();
-}
-
-Error LazyRandomTypeCollection::visitOneRecord(TypeIndex TI, uint32_t Offset,
- CVType &Record) {
- assert(!Database.contains(TI));
- if (auto EC = codeview::visitTypeRecord(Record, TI, DatabaseVisitor))
- return EC;
- // Keep the KnownOffsets array the same size as the Database's capacity. Since
- // we don't always know how many records are in the type stream, we need to be
- // prepared for the database growing and receicing a type index that can't fit
- // in our current buffer.
- if (KnownOffsets.size() < Database.capacity())
- KnownOffsets.resize(Database.capacity());
- KnownOffsets[TI.toArrayIndex()] = Offset;
- return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 36abafc079edb..b9fa9b6a6ad7e 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -524,7 +524,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) {
DictScope S(W, "PublicSym");
- printTypeIndex("Type", Public.Index);
+ W.printFlags("Flags", uint32_t(Public.Flags), getPublicSymFlagNames());
W.printNumber("Seg", Public.Segment);
W.printNumber("Off", Public.Offset);
W.printString("Name", Public.Name);
diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index d731dc1b0a372..923837a45d9fc 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -361,7 +361,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
PublicSym32 &Public) {
- error(IO.mapInteger(Public.Index));
+ error(IO.mapEnum(Public.Flags));
error(IO.mapInteger(Public.Offset));
error(IO.mapInteger(Public.Segment));
error(IO.mapStringZ(Public.Name));
diff --git a/lib/DebugInfo/CodeView/TypeDatabase.cpp b/lib/DebugInfo/CodeView/TypeDatabase.cpp
deleted file mode 100644
index 08f848b36a9d5..0000000000000
--- a/lib/DebugInfo/CodeView/TypeDatabase.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-//===- TypeDatabase.cpp --------------------------------------- *- C++ --*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-
-TypeDatabase::TypeDatabase(uint32_t Capacity) : TypeNameStorage(Allocator) {
- CVUDTNames.resize(Capacity);
- TypeRecords.resize(Capacity);
- ValidRecords.resize(Capacity);
-}
-
-TypeIndex TypeDatabase::appendType(StringRef Name, const CVType &Data) {
- LargestTypeIndex = getAppendIndex();
- if (LargestTypeIndex.toArrayIndex() >= capacity())
- grow();
- recordType(Name, LargestTypeIndex, Data);
- return LargestTypeIndex;
-}
-
-void TypeDatabase::recordType(StringRef Name, TypeIndex Index,
- const CVType &Data) {
- LargestTypeIndex = empty() ? Index : std::max(Index, LargestTypeIndex);
-
- if (LargestTypeIndex.toArrayIndex() >= capacity())
- grow(Index);
-
- uint32_t AI = Index.toArrayIndex();
-
- assert(!contains(Index));
- assert(AI < capacity());
-
- CVUDTNames[AI] = Name;
- TypeRecords[AI] = Data;
- ValidRecords.set(AI);
- ++Count;
-}
-
-/// Saves the name in a StringSet and creates a stable StringRef.
-StringRef TypeDatabase::saveTypeName(StringRef TypeName) {
- return TypeNameStorage.save(TypeName);
-}
-
-StringRef TypeDatabase::getTypeName(TypeIndex Index) const {
- if (Index.isNoneType() || Index.isSimple())
- return TypeIndex::simpleTypeName(Index);
-
- if (contains(Index))
- return CVUDTNames[Index.toArrayIndex()];
-
- return "<unknown UDT>";
-}
-
-const CVType &TypeDatabase::getTypeRecord(TypeIndex Index) const {
- assert(contains(Index));
- return TypeRecords[Index.toArrayIndex()];
-}
-
-CVType &TypeDatabase::getTypeRecord(TypeIndex Index) {
- assert(contains(Index));
- return TypeRecords[Index.toArrayIndex()];
-}
-
-bool TypeDatabase::contains(TypeIndex Index) const {
- uint32_t AI = Index.toArrayIndex();
- if (AI >= capacity())
- return false;
-
- return ValidRecords.test(AI);
-}
-
-uint32_t TypeDatabase::size() const { return Count; }
-
-uint32_t TypeDatabase::capacity() const { return TypeRecords.size(); }
-
-CVType TypeDatabase::getType(TypeIndex Index) { return getTypeRecord(Index); }
-
-StringRef TypeDatabase::getTypeName(TypeIndex Index) {
- return static_cast<const TypeDatabase *>(this)->getTypeName(Index);
-}
-
-bool TypeDatabase::contains(TypeIndex Index) {
- return static_cast<const TypeDatabase *>(this)->contains(Index);
-}
-
-uint32_t TypeDatabase::size() {
- return static_cast<const TypeDatabase *>(this)->size();
-}
-
-uint32_t TypeDatabase::capacity() {
- return static_cast<const TypeDatabase *>(this)->capacity();
-}
-
-void TypeDatabase::grow() { grow(LargestTypeIndex + 1); }
-
-void TypeDatabase::grow(TypeIndex NewIndex) {
- uint32_t NewSize = NewIndex.toArrayIndex() + 1;
-
- if (NewSize <= capacity())
- return;
-
- uint32_t NewCapacity = NewSize * 3 / 2;
-
- TypeRecords.resize(NewCapacity);
- CVUDTNames.resize(NewCapacity);
- ValidRecords.resize(NewCapacity);
-}
-
-bool TypeDatabase::empty() const { return size() == 0; }
-
-Optional<TypeIndex> TypeDatabase::largestTypeIndexLessThan(TypeIndex TI) const {
- uint32_t AI = TI.toArrayIndex();
- int N = ValidRecords.find_prev(AI);
- if (N == -1)
- return None;
- return TypeIndex::fromArrayIndex(N);
-}
-
-TypeIndex TypeDatabase::getAppendIndex() const {
- if (empty())
- return TypeIndex::fromArrayIndex(0);
-
- return LargestTypeIndex + 1;
-}
-
-Optional<TypeIndex> TypeDatabase::getFirst() {
- int N = ValidRecords.find_first();
- if (N == -1)
- return None;
- return TypeIndex::fromArrayIndex(N);
-}
-
-Optional<TypeIndex> TypeDatabase::getNext(TypeIndex Prev) {
- int N = ValidRecords.find_next(Prev.toArrayIndex());
- if (N == -1)
- return None;
- return TypeIndex::fromArrayIndex(N);
-}
diff --git a/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp b/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp
deleted file mode 100644
index 8d97f8b1cb401..0000000000000
--- a/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp
+++ /dev/null
@@ -1,330 +0,0 @@
-//===- TypeDatabaseVisitor.cpp -------------------------------- *- C++ --*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
-
-#include "llvm/ADT/SmallString.h"
-
-using namespace llvm;
-
-using namespace llvm::codeview;
-
-Error TypeDatabaseVisitor::visitTypeBegin(CVType &Record) {
- assert(!IsInFieldList);
- // Reset Name to the empty string. If the visitor sets it, we know it.
- Name = "";
-
- if (Record.Type == LF_FIELDLIST) {
- // Record that we're in a field list so that members do not get assigned
- // type indices.
- IsInFieldList = true;
- }
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
- if (auto EC = visitTypeBegin(Record))
- return EC;
-
- CurrentTypeIndex = Index;
- return Error::success();
-}
-
-StringRef TypeDatabaseVisitor::getTypeName(TypeIndex Index) const {
- return TypeDB->getTypeName(Index);
-}
-
-StringRef TypeDatabaseVisitor::saveTypeName(StringRef Name) {
- return TypeDB->saveTypeName(Name);
-}
-
-Error TypeDatabaseVisitor::visitTypeEnd(CVType &CVR) {
- if (CVR.Type == LF_FIELDLIST) {
- assert(IsInFieldList);
- IsInFieldList = false;
- }
- assert(!IsInFieldList);
-
- // Record every type that is not a field list member, even if Name is empty.
- // CVUDTNames is indexed by type index, and must have one entry for every
- // type. Field list members are not recorded, and are only referenced by
- // their containing field list record.
- if (CurrentTypeIndex)
- TypeDB->recordType(Name, *CurrentTypeIndex, CVR);
- else
- TypeDB->appendType(Name, CVR);
-
- CurrentTypeIndex.reset();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitMemberBegin(CVMemberRecord &Record) {
- assert(IsInFieldList);
- // Reset Name to the empty string. If the visitor sets it, we know it.
- Name = "";
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitMemberEnd(CVMemberRecord &Record) {
- assert(IsInFieldList);
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR,
- FieldListRecord &FieldList) {
- Name = "<field list>";
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
- StringIdRecord &String) {
- // Put this in the database so it gets printed with LF_UDT_SRC_LINE.
- Name = String.getString();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ArgListRecord &Args) {
- auto Indices = Args.getIndices();
- uint32_t Size = Indices.size();
- SmallString<256> TypeName("(");
- for (uint32_t I = 0; I < Size; ++I) {
- StringRef ArgTypeName = getTypeName(Indices[I]);
- TypeName.append(ArgTypeName);
- if (I + 1 != Size)
- TypeName.append(", ");
- }
- TypeName.push_back(')');
- Name = saveTypeName(TypeName);
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR,
- StringListRecord &Strings) {
- auto Indices = Strings.getIndices();
- uint32_t Size = Indices.size();
- SmallString<256> TypeName("\"");
- for (uint32_t I = 0; I < Size; ++I) {
- StringRef ArgTypeName = getTypeName(Indices[I]);
- TypeName.append(ArgTypeName);
- if (I + 1 != Size)
- TypeName.append("\" \"");
- }
- TypeName.push_back('\"');
- Name = saveTypeName(TypeName);
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ClassRecord &Class) {
- Name = Class.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, UnionRecord &Union) {
- Name = Union.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, EnumRecord &Enum) {
- Name = Enum.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ArrayRecord &AT) {
- Name = AT.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, VFTableRecord &VFT) {
- Name = VFT.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR,
- MemberFuncIdRecord &Id) {
- Name = Id.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR,
- ProcedureRecord &Proc) {
- StringRef ReturnTypeName = getTypeName(Proc.getReturnType());
- StringRef ArgListTypeName = getTypeName(Proc.getArgumentList());
- SmallString<256> TypeName(ReturnTypeName);
- TypeName.push_back(' ');
- TypeName.append(ArgListTypeName);
- Name = saveTypeName(TypeName);
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR,
- MemberFunctionRecord &MF) {
- StringRef ReturnTypeName = getTypeName(MF.getReturnType());
- StringRef ClassTypeName = getTypeName(MF.getClassType());
- StringRef ArgListTypeName = getTypeName(MF.getArgumentList());
- SmallString<256> TypeName(ReturnTypeName);
- TypeName.push_back(' ');
- TypeName.append(ClassTypeName);
- TypeName.append("::");
- TypeName.append(ArgListTypeName);
- Name = saveTypeName(TypeName);
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) {
- Name = Func.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR,
- TypeServer2Record &TS) {
- Name = TS.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) {
-
- if (Ptr.isPointerToMember()) {
- const MemberPointerInfo &MI = Ptr.getMemberInfo();
-
- StringRef PointeeName = getTypeName(Ptr.getReferentType());
- StringRef ClassName = getTypeName(MI.getContainingType());
- SmallString<256> TypeName(PointeeName);
- TypeName.push_back(' ');
- TypeName.append(ClassName);
- TypeName.append("::*");
- Name = saveTypeName(TypeName);
- } else {
- SmallString<256> TypeName;
- if (Ptr.isConst())
- TypeName.append("const ");
- if (Ptr.isVolatile())
- TypeName.append("volatile ");
- if (Ptr.isUnaligned())
- TypeName.append("__unaligned ");
-
- TypeName.append(getTypeName(Ptr.getReferentType()));
-
- if (Ptr.getMode() == PointerMode::LValueReference)
- TypeName.append("&");
- else if (Ptr.getMode() == PointerMode::RValueReference)
- TypeName.append("&&");
- else if (Ptr.getMode() == PointerMode::Pointer)
- TypeName.append("*");
-
- if (!TypeName.empty())
- Name = saveTypeName(TypeName);
- }
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ModifierRecord &Mod) {
- uint16_t Mods = static_cast<uint16_t>(Mod.getModifiers());
-
- StringRef ModifiedName = getTypeName(Mod.getModifiedType());
- SmallString<256> TypeName;
- if (Mods & uint16_t(ModifierOptions::Const))
- TypeName.append("const ");
- if (Mods & uint16_t(ModifierOptions::Volatile))
- TypeName.append("volatile ");
- if (Mods & uint16_t(ModifierOptions::Unaligned))
- TypeName.append("__unaligned ");
- TypeName.append(ModifiedName);
- Name = saveTypeName(TypeName);
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR,
- VFTableShapeRecord &Shape) {
- Name =
- saveTypeName("<vftable " + utostr(Shape.getEntryCount()) + " methods>");
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
- NestedTypeRecord &Nested) {
- Name = Nested.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
- OneMethodRecord &Method) {
- Name = Method.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
- OverloadedMethodRecord &Method) {
- Name = Method.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
- DataMemberRecord &Field) {
- Name = Field.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
- StaticDataMemberRecord &Field) {
- Name = Field.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
- EnumeratorRecord &Enum) {
- Name = Enum.getName();
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
- BaseClassRecord &Base) {
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
- VirtualBaseClassRecord &VBase) {
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
- ListContinuationRecord &Cont) {
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(
- CVType &CVR, UdtModSourceLineRecord &ModSourceLine) {
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR,
- UdtSourceLineRecord &SourceLine) {
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, BitFieldRecord &BF) {
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(
- CVType &CVR, MethodOverloadListRecord &Overloads) {
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, BuildInfoRecord &BI) {
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, LabelRecord &R) {
- return Error::success();
-}
-
-Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
- VFPtrRecord &VFP) {
- return Error::success();
-}
diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index 04b0384d81902..5899667050151 100644
--- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -13,8 +13,6 @@
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/Formatters.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/Support/BinaryByteStream.h"
diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index 8704cea607867..1226d5be3f3c4 100644
--- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -357,6 +357,82 @@ static void discoverTypeIndices(ArrayRef<uint8_t> Content, TypeLeafKind Kind,
}
}
+static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
+ SmallVectorImpl<TiReference> &Refs) {
+ uint32_t Count;
+ // FIXME: In the future it would be nice if we could avoid hardcoding these
+ // values. One idea is to define some structures representing these types
+ // that would allow the use of offsetof().
+ switch (Kind) {
+ case SymbolKind::S_GPROC32:
+ case SymbolKind::S_LPROC32:
+ case SymbolKind::S_GPROC32_ID:
+ case SymbolKind::S_LPROC32_ID:
+ case SymbolKind::S_LPROC32_DPC:
+ case SymbolKind::S_LPROC32_DPC_ID:
+ Refs.push_back({TiRefKind::IndexRef, 24, 1}); // LF_FUNC_ID
+ break;
+ case SymbolKind::S_UDT:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1}); // UDT
+ break;
+ case SymbolKind::S_GDATA32:
+ case SymbolKind::S_LDATA32:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
+ break;
+ case SymbolKind::S_BUILDINFO:
+ Refs.push_back({TiRefKind::IndexRef, 0, 1}); // Compile flags
+ break;
+ case SymbolKind::S_LOCAL:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
+ break;
+ case SymbolKind::S_CONSTANT:
+ Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
+ break;
+ case SymbolKind::S_REGREL32:
+ Refs.push_back({TiRefKind::TypeRef, 4, 1}); // Type
+ break;
+ case SymbolKind::S_CALLSITEINFO:
+ Refs.push_back({TiRefKind::TypeRef, 8, 1}); // Call signature
+ break;
+ case SymbolKind::S_CALLERS:
+ case SymbolKind::S_CALLEES:
+ // The record is a count followed by an array of type indices.
+ Count = *reinterpret_cast<const ulittle32_t *>(Content.data());
+ Refs.push_back({TiRefKind::IndexRef, 4, Count}); // Callees
+ break;
+ case SymbolKind::S_INLINESITE:
+ Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee
+ break;
+
+ // Defranges don't have types, just registers and code offsets.
+ case SymbolKind::S_DEFRANGE_REGISTER:
+ case SymbolKind::S_DEFRANGE_REGISTER_REL:
+ case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL:
+ case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE:
+ case SymbolKind::S_DEFRANGE_SUBFIELD_REGISTER:
+ case SymbolKind::S_DEFRANGE_SUBFIELD:
+ break;
+
+ // No type refernces.
+ case SymbolKind::S_LABEL32:
+ case SymbolKind::S_OBJNAME:
+ case SymbolKind::S_COMPILE:
+ case SymbolKind::S_COMPILE2:
+ case SymbolKind::S_COMPILE3:
+ case SymbolKind::S_BLOCK32:
+ case SymbolKind::S_FRAMEPROC:
+ break;
+ // Scope ending symbols.
+ case SymbolKind::S_END:
+ case SymbolKind::S_INLINESITE_END:
+ case SymbolKind::S_PROC_ID_END:
+ break;
+ default:
+ return false; // Unknown symbol.
+ }
+ return true;
+}
+
void llvm::codeview::discoverTypeIndices(const CVType &Type,
SmallVectorImpl<TiReference> &Refs) {
::discoverTypeIndices(Type.content(), Type.kind(), Refs);
@@ -369,3 +445,9 @@ void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
TypeLeafKind K = static_cast<TypeLeafKind>(uint16_t(P->RecordKind));
::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs);
}
+
+bool llvm::codeview::discoverTypeIndices(const CVSymbol &Sym,
+ SmallVectorImpl<TiReference> &Refs) {
+ SymbolKind K = Sym.kind();
+ return ::discoverTypeIndices(Sym.content(), K, Refs);
+}
diff --git a/lib/DebugInfo/CodeView/TypeName.cpp b/lib/DebugInfo/CodeView/TypeName.cpp
new file mode 100644
index 0000000000000..2eb8b81862f3c
--- /dev/null
+++ b/lib/DebugInfo/CodeView/TypeName.cpp
@@ -0,0 +1,243 @@
+//===- TypeName.cpp ------------------------------------------- *- C++ --*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeName.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/Support/FormatVariadic.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+namespace {
+class TypeNameComputer : public TypeVisitorCallbacks {
+ /// The type collection. Used to calculate names of nested types.
+ TypeCollection &Types;
+ TypeIndex CurrentTypeIndex = TypeIndex::None();
+
+ /// Name of the current type. Only valid before visitTypeEnd.
+ SmallString<256> Name;
+
+public:
+ explicit TypeNameComputer(TypeCollection &Types) : Types(Types) {}
+
+ StringRef name() const { return Name; }
+
+ /// Paired begin/end actions for all types. Receives all record data,
+ /// including the fixed-length record prefix.
+ Error visitTypeBegin(CVType &Record) override;
+ Error visitTypeBegin(CVType &Record, TypeIndex Index) override;
+ Error visitTypeEnd(CVType &Record) override;
+
+#define TYPE_RECORD(EnumName, EnumVal, Name) \
+ Error visitKnownRecord(CVType &CVR, Name##Record &Record) override;
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD(EnumName, EnumVal, Name)
+#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
+};
+} // namespace
+
+Error TypeNameComputer::visitTypeBegin(CVType &Record) {
+ llvm_unreachable("Must call visitTypeBegin with a TypeIndex!");
+ return Error::success();
+}
+
+Error TypeNameComputer::visitTypeBegin(CVType &Record, TypeIndex Index) {
+ // Reset Name to the empty string. If the visitor sets it, we know it.
+ Name = "";
+ CurrentTypeIndex = Index;
+ return Error::success();
+}
+
+Error TypeNameComputer::visitTypeEnd(CVType &CVR) { return Error::success(); }
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR,
+ FieldListRecord &FieldList) {
+ Name = "<field list>";
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+ StringIdRecord &String) {
+ Name = String.getString();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArgListRecord &Args) {
+ auto Indices = Args.getIndices();
+ uint32_t Size = Indices.size();
+ Name = "(";
+ for (uint32_t I = 0; I < Size; ++I) {
+ assert(Indices[I] < CurrentTypeIndex);
+
+ Name.append(Types.getTypeName(Indices[I]));
+ if (I + 1 != Size)
+ Name.append(", ");
+ }
+ Name.push_back(')');
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR,
+ StringListRecord &Strings) {
+ auto Indices = Strings.getIndices();
+ uint32_t Size = Indices.size();
+ Name = "\"";
+ for (uint32_t I = 0; I < Size; ++I) {
+ Name.append(Types.getTypeName(Indices[I]));
+ if (I + 1 != Size)
+ Name.append("\" \"");
+ }
+ Name.push_back('\"');
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, ClassRecord &Class) {
+ Name = Class.getName();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, UnionRecord &Union) {
+ Name = Union.getName();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, EnumRecord &Enum) {
+ Name = Enum.getName();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArrayRecord &AT) {
+ Name = AT.getName();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, VFTableRecord &VFT) {
+ Name = VFT.getName();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, MemberFuncIdRecord &Id) {
+ Name = Id.getName();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, ProcedureRecord &Proc) {
+ StringRef Ret = Types.getTypeName(Proc.getReturnType());
+ StringRef Params = Types.getTypeName(Proc.getArgumentList());
+ Name = formatv("{0} {1}", Ret, Params).sstr<256>();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR,
+ MemberFunctionRecord &MF) {
+ StringRef Ret = Types.getTypeName(MF.getReturnType());
+ StringRef Class = Types.getTypeName(MF.getClassType());
+ StringRef Params = Types.getTypeName(MF.getArgumentList());
+ Name = formatv("{0} {1}::{2}", Ret, Class, Params).sstr<256>();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) {
+ Name = Func.getName();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) {
+ Name = TS.getName();
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) {
+
+ if (Ptr.isPointerToMember()) {
+ const MemberPointerInfo &MI = Ptr.getMemberInfo();
+
+ StringRef Pointee = Types.getTypeName(Ptr.getReferentType());
+ StringRef Class = Types.getTypeName(MI.getContainingType());
+ Name = formatv("{0} {1}::*", Pointee, Class);
+ } else {
+ if (Ptr.isConst())
+ Name.append("const ");
+ if (Ptr.isVolatile())
+ Name.append("volatile ");
+ if (Ptr.isUnaligned())
+ Name.append("__unaligned ");
+
+ Name.append(Types.getTypeName(Ptr.getReferentType()));
+
+ if (Ptr.getMode() == PointerMode::LValueReference)
+ Name.append("&");
+ else if (Ptr.getMode() == PointerMode::RValueReference)
+ Name.append("&&");
+ else if (Ptr.getMode() == PointerMode::Pointer)
+ Name.append("*");
+ }
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, ModifierRecord &Mod) {
+ uint16_t Mods = static_cast<uint16_t>(Mod.getModifiers());
+
+ SmallString<256> TypeName;
+ if (Mods & uint16_t(ModifierOptions::Const))
+ Name.append("const ");
+ if (Mods & uint16_t(ModifierOptions::Volatile))
+ Name.append("volatile ");
+ if (Mods & uint16_t(ModifierOptions::Unaligned))
+ Name.append("__unaligned ");
+ Name.append(Types.getTypeName(Mod.getModifiedType()));
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR,
+ VFTableShapeRecord &Shape) {
+ Name = formatv("<vftable {0} methods>", Shape.getEntryCount());
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(
+ CVType &CVR, UdtModSourceLineRecord &ModSourceLine) {
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR,
+ UdtSourceLineRecord &SourceLine) {
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, BitFieldRecord &BF) {
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR,
+ MethodOverloadListRecord &Overloads) {
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, BuildInfoRecord &BI) {
+ return Error::success();
+}
+
+Error TypeNameComputer::visitKnownRecord(CVType &CVR, LabelRecord &R) {
+ return Error::success();
+}
+
+std::string llvm::codeview::computeTypeName(TypeCollection &Types,
+ TypeIndex Index) {
+ TypeNameComputer Computer(Types);
+ CVType Record = Types.getType(Index);
+ if (auto EC = visitTypeRecord(Record, Index, Computer)) {
+ consumeError(std::move(EC));
+ return "<unknown UDT>";
+ }
+ return Computer.name();
+}
diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
index 8d974d522f283..4eca5aeaa0ae3 100644
--- a/lib/DebugInfo/CodeView/TypeTableCollection.cpp
+++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
@@ -10,7 +10,7 @@
#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeName.h"
#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamReader.h"
@@ -18,14 +18,10 @@
using namespace llvm;
using namespace llvm::codeview;
-static void error(Error &&EC) {
- assert(!static_cast<bool>(EC));
- if (EC)
- consumeError(std::move(EC));
-}
-
TypeTableCollection::TypeTableCollection(ArrayRef<ArrayRef<uint8_t>> Records)
- : Records(Records), Database(Records.size()) {}
+ : NameStorage(Allocator), Records(Records) {
+ Names.resize(Records.size());
+}
Optional<TypeIndex> TypeTableCollection::getFirst() {
if (empty())
@@ -34,50 +30,38 @@ Optional<TypeIndex> TypeTableCollection::getFirst() {
}
Optional<TypeIndex> TypeTableCollection::getNext(TypeIndex Prev) {
+ assert(contains(Prev));
++Prev;
- assert(Prev.toArrayIndex() <= size());
if (Prev.toArrayIndex() == size())
return None;
return Prev;
}
-void TypeTableCollection::ensureTypeExists(TypeIndex Index) {
- assert(hasCapacityFor(Index));
-
- if (Database.contains(Index))
- return;
-
- BinaryByteStream Bytes(Records[Index.toArrayIndex()], support::little);
-
- CVType Type;
- uint32_t Len;
- VarStreamArrayExtractor<CVType> Extract;
- error(Extract(Bytes, Len, Type));
-
- TypeDatabaseVisitor DBV(Database);
- error(codeview::visitTypeRecord(Type, Index, DBV));
- assert(Database.contains(Index));
-}
-
CVType TypeTableCollection::getType(TypeIndex Index) {
- ensureTypeExists(Index);
- return Database.getTypeRecord(Index);
+ assert(Index.toArrayIndex() < Records.size());
+ ArrayRef<uint8_t> Bytes = Records[Index.toArrayIndex()];
+ const RecordPrefix *Prefix =
+ reinterpret_cast<const RecordPrefix *>(Bytes.data());
+ TypeLeafKind Kind = static_cast<TypeLeafKind>(uint16_t(Prefix->RecordKind));
+ return CVType(Kind, Bytes);
}
StringRef TypeTableCollection::getTypeName(TypeIndex Index) {
- if (!Index.isSimple())
- ensureTypeExists(Index);
- return Database.getTypeName(Index);
+ if (Index.isNoneType() || Index.isSimple())
+ return TypeIndex::simpleTypeName(Index);
+
+ uint32_t I = Index.toArrayIndex();
+ if (Names[I].data() == nullptr) {
+ StringRef Result = NameStorage.save(computeTypeName(*this, Index));
+ Names[I] = Result;
+ }
+ return Names[I];
}
bool TypeTableCollection::contains(TypeIndex Index) {
- return Database.contains(Index);
+ return Index.toArrayIndex() <= size();
}
uint32_t TypeTableCollection::size() { return Records.size(); }
uint32_t TypeTableCollection::capacity() { return Records.size(); }
-
-bool TypeTableCollection::hasCapacityFor(TypeIndex Index) const {
- return Index.toArrayIndex() < Records.size();
-}
diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index 57eac91f8c192..bb475a669efb2 100644
--- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -65,46 +65,52 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data,
if (A && F) {
Optional<int64_t> V;
bool IsImplicitConst = (F == DW_FORM_implicit_const);
- if (IsImplicitConst)
+ if (IsImplicitConst) {
V = Data.getSLEB128(OffsetPtr);
- else if (auto Size = DWARFFormValue::getFixedByteSize(F))
- V = *Size;
- AttributeSpecs.push_back(AttributeSpec(A, F, V));
- if (IsImplicitConst)
+ AttributeSpecs.push_back(AttributeSpec(A, F, V));
continue;
+ }
// If this abbrevation still has a fixed byte size, then update the
// FixedAttributeSize as needed.
- if (FixedAttributeSize) {
- if (V)
- FixedAttributeSize->NumBytes += *V;
- else {
- switch (F) {
- case DW_FORM_addr:
- ++FixedAttributeSize->NumAddrs;
- break;
-
- case DW_FORM_ref_addr:
- ++FixedAttributeSize->NumRefAddrs;
- break;
-
- case DW_FORM_strp:
- case DW_FORM_GNU_ref_alt:
- case DW_FORM_GNU_strp_alt:
- case DW_FORM_line_strp:
- case DW_FORM_sec_offset:
- case DW_FORM_strp_sup:
- ++FixedAttributeSize->NumDwarfOffsets;
- break;
-
- default:
- // Indicate we no longer have a fixed byte size for this
- // abbreviation by clearing the FixedAttributeSize optional value
- // so it doesn't have a value.
- FixedAttributeSize.reset();
- break;
- }
+ switch (F) {
+ case DW_FORM_addr:
+ if (FixedAttributeSize)
+ ++FixedAttributeSize->NumAddrs;
+ break;
+
+ case DW_FORM_ref_addr:
+ if (FixedAttributeSize)
+ ++FixedAttributeSize->NumRefAddrs;
+ break;
+
+ case DW_FORM_strp:
+ case DW_FORM_GNU_ref_alt:
+ case DW_FORM_GNU_strp_alt:
+ case DW_FORM_line_strp:
+ case DW_FORM_sec_offset:
+ case DW_FORM_strp_sup:
+ if (FixedAttributeSize)
+ ++FixedAttributeSize->NumDwarfOffsets;
+ break;
+
+ default:
+ // The form has a byte size that doesn't depend on Params.
+ // If it's a fixed size, keep track of it.
+ if (auto Size =
+ DWARFFormValue::getFixedByteSize(F, DWARFFormParams())) {
+ V = *Size;
+ if (FixedAttributeSize)
+ FixedAttributeSize->NumBytes += *V;
+ break;
}
+ // Indicate we no longer have a fixed byte size for this
+ // abbreviation by clearing the FixedAttributeSize optional value
+ // so it doesn't have a value.
+ FixedAttributeSize.reset();
+ break;
}
+ // Record this attribute and its fixed size if it has one.
+ AttributeSpecs.push_back(AttributeSpec(A, F, V));
} else if (A == 0 && F == 0) {
// We successfully reached the end of this abbreviation declaration
// since both attribute and form are zero.
@@ -186,7 +192,8 @@ Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue(
if (auto FixedSize = Spec.getByteSize(U))
Offset += *FixedSize;
else
- DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, &U);
+ DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset,
+ U.getFormParams());
++AttrIndex;
}
return None;
@@ -211,7 +218,8 @@ Optional<int64_t> DWARFAbbreviationDeclaration::AttributeSpec::getByteSize(
if (ByteSizeOrValue)
return ByteSizeOrValue;
Optional<int64_t> S;
- auto FixedByteSize = DWARFFormValue::getFixedByteSize(Form, &U);
+ auto FixedByteSize =
+ DWARFFormValue::getFixedByteSize(Form, U.getFormParams());
if (FixedByteSize)
S = *FixedByteSize;
return S;
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 9bafcde57f0ae..3814794617503 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
@@ -36,7 +37,6 @@
#include "llvm/Object/RelocVisitor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -44,8 +44,8 @@
#include <algorithm>
#include <cstdint>
#include <map>
-#include <set>
#include <string>
+#include <tuple>
#include <utility>
#include <vector>
@@ -55,9 +55,9 @@ using namespace object;
#define DEBUG_TYPE "dwarf"
-typedef DWARFDebugLine::LineTable DWARFLineTable;
-typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
-typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind;
+using DWARFLineTable = DWARFDebugLine::LineTable;
+using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind;
+using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size,
uint32_t *Off, const RelocAddrMap *Relocs,
@@ -201,8 +201,7 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName,
}
}
-void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){
-
+void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
DIDumpType DumpType = DumpOpts.DumpType;
bool DumpEH = DumpOpts.DumpEH;
bool SummarizeTypes = DumpOpts.SummarizeTypes;
@@ -1068,7 +1067,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
errs() << "error: failed to compute relocation: " << Name << "\n";
continue;
}
- llvm::RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val};
+ RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val};
Map->insert({Reloc.getOffset(), Rel});
}
}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index cf9fec2b3254c..475cf25b781b4 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -8,7 +8,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
-
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
@@ -71,7 +70,7 @@ protected:
/// An entry may contain CFI instructions. An instruction consists of an
/// opcode and an optional sequence of operands.
- typedef std::vector<uint64_t> Operands;
+ using Operands = std::vector<uint64_t>;
struct Instruction {
Instruction(uint8_t Opcode)
: Opcode(Opcode)
@@ -518,14 +517,13 @@ static uint64_t readPointer(const DataExtractor &Data, uint32_t &Offset,
// noreturn attribute usage in lambdas. Once the support for those
// compilers are phased out, we can remove this and return back to
// a ReportError lambda: [StartOffset](const char *ErrorMsg).
-#define ReportError(ErrorMsg) ReportErrorImpl(StartOffset,ErrorMsg)
-static void LLVM_ATTRIBUTE_NORETURN
-ReportErrorImpl(uint32_t StartOffset, const char *ErrorMsg) {
- std::string Str;
- raw_string_ostream OS(Str);
- OS << format(ErrorMsg, StartOffset);
- OS.flush();
- report_fatal_error(Str);
+static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset,
+ const char *ErrorMsg) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << format(ErrorMsg, StartOffset);
+ OS.flush();
+ report_fatal_error(Str);
}
void DWARFDebugFrame::parse(DataExtractor Data) {
@@ -590,13 +588,15 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
switch (AugmentationString[i]) {
default:
- ReportError("Unknown augmentation character in entry at %lx");
+ ReportError(StartOffset,
+ "Unknown augmentation character in entry at %lx");
case 'L':
LSDAPointerEncoding = Data.getU8(&Offset);
break;
case 'P': {
if (Personality)
- ReportError("Duplicate personality in entry at %lx");
+ ReportError(StartOffset,
+ "Duplicate personality in entry at %lx");
PersonalityEncoding = Data.getU8(&Offset);
Personality = readPointer(Data, Offset, *PersonalityEncoding);
break;
@@ -606,7 +606,8 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
break;
case 'z':
if (i)
- ReportError("'z' must be the first character at %lx");
+ ReportError(StartOffset,
+ "'z' must be the first character at %lx");
// Parse the augmentation length first. We only parse it if
// the string contains a 'z'.
AugmentationLength = Data.getULEB128(&Offset);
@@ -618,7 +619,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
if (AugmentationLength.hasValue()) {
if (Offset != EndAugmentationOffset)
- ReportError("Parsing augmentation data at %lx failed");
+ ReportError(StartOffset, "Parsing augmentation data at %lx failed");
AugmentationData = Data.getData().slice(StartAugmentationOffset,
EndAugmentationOffset);
@@ -645,7 +646,8 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
if (IsEH) {
// The address size is encoded in the CIE we reference.
if (!Cie)
- ReportError("Parsing FDE data at %lx failed due to missing CIE");
+ ReportError(StartOffset,
+ "Parsing FDE data at %lx failed due to missing CIE");
InitialLocation = readPointer(Data, Offset,
Cie->getFDEPointerEncoding());
@@ -665,7 +667,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
readPointer(Data, Offset, Cie->getLSDAPointerEncoding());
if (Offset != EndAugmentationOffset)
- ReportError("Parsing augmentation data at %lx failed");
+ ReportError(StartOffset, "Parsing augmentation data at %lx failed");
}
} else {
InitialLocation = Data.getAddress(&Offset);
@@ -680,7 +682,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
Entries.back()->parseInstructions(Data, &Offset, EndStructureOffset);
if (Offset != EndStructureOffset)
- ReportError("Parsing entry instructions at %lx failed");
+ ReportError(StartOffset, "Parsing entry instructions at %lx failed");
}
}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index dbcc64fc0832f..1551974b822ac 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -59,7 +59,7 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr,
// Attribute byte size if fixed, just add the size to the offset.
*OffsetPtr += *FixedSize;
} else if (!DWARFFormValue::skipValue(AttrSpec.Form, DebugInfoData,
- OffsetPtr, &U)) {
+ OffsetPtr, U.getFormParams())) {
// We failed to skip this attribute's value, restore the original offset
// and return the failure status.
*OffsetPtr = Offset;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index cda3e75fbc3e7..ad5647f3e03d8 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -9,6 +9,8 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
@@ -26,23 +28,27 @@
using namespace llvm;
using namespace dwarf;
-typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
+using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind;
+
namespace {
+
struct ContentDescriptor {
dwarf::LineNumberEntryFormat Type;
dwarf::Form Form;
};
-typedef SmallVector<ContentDescriptor, 4> ContentDescriptors;
+
+using ContentDescriptors = SmallVector<ContentDescriptor, 4>;
+
} // end anonmyous namespace
DWARFDebugLine::Prologue::Prologue() { clear(); }
void DWARFDebugLine::Prologue::clear() {
- TotalLength = Version = PrologueLength = 0;
- AddressSize = SegSelectorSize = 0;
+ TotalLength = PrologueLength = 0;
+ SegSelectorSize = 0;
MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0;
OpcodeBase = 0;
- IsDWARF64 = false;
+ FormParams = DWARFFormParams({0, 0, DWARF32});
StandardOpcodeLengths.clear();
IncludeDirectories.clear();
FileNames.clear();
@@ -51,12 +57,13 @@ void DWARFDebugLine::Prologue::clear() {
void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
OS << "Line table prologue:\n"
<< format(" total_length: 0x%8.8" PRIx64 "\n", TotalLength)
- << format(" version: %u\n", Version)
- << format(Version >= 5 ? " address_size: %u\n" : "", AddressSize)
- << format(Version >= 5 ? " seg_select_size: %u\n" : "", SegSelectorSize)
- << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength)
+ << format(" version: %u\n", getVersion());
+ if (getVersion() >= 5)
+ OS << format(" address_size: %u\n", getAddressSize())
+ << format(" seg_select_size: %u\n", SegSelectorSize);
+ OS << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength)
<< format(" min_inst_length: %u\n", MinInstLength)
- << format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst)
+ << format(getVersion() >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst)
<< format(" default_is_stmt: %u\n", DefaultIsStmt)
<< format(" line_base: %i\n", LineBase)
<< format(" line_range: %u\n", LineRange)
@@ -137,6 +144,7 @@ parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr,
static bool
parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
uint64_t EndPrologueOffset,
+ const DWARFFormParams &FormParams,
std::vector<StringRef> &IncludeDirectories,
std::vector<DWARFDebugLine::FileNameEntry> &FileNames) {
// Get the directory entry description.
@@ -159,7 +167,7 @@ parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
IncludeDirectories.push_back(Value.getAsCString().getValue());
break;
default:
- if (!Value.skipValue(DebugLineData, OffsetPtr, nullptr))
+ if (!Value.skipValue(DebugLineData, OffsetPtr, FormParams))
return false;
}
}
@@ -211,24 +219,26 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData,
clear();
TotalLength = DebugLineData.getU32(OffsetPtr);
if (TotalLength == UINT32_MAX) {
- IsDWARF64 = true;
+ FormParams.Format = dwarf::DWARF64;
TotalLength = DebugLineData.getU64(OffsetPtr);
- } else if (TotalLength > 0xffffff00) {
+ } else if (TotalLength >= 0xffffff00) {
return false;
}
- Version = DebugLineData.getU16(OffsetPtr);
- if (Version < 2)
+ FormParams.Version = DebugLineData.getU16(OffsetPtr);
+ if (getVersion() < 2)
return false;
- if (Version >= 5) {
- AddressSize = DebugLineData.getU8(OffsetPtr);
+ if (getVersion() >= 5) {
+ FormParams.AddrSize = DebugLineData.getU8(OffsetPtr);
+ assert(getAddressSize() == DebugLineData.getAddressSize() &&
+ "Line table header and data extractor disagree");
SegSelectorSize = DebugLineData.getU8(OffsetPtr);
}
PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength());
const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr;
MinInstLength = DebugLineData.getU8(OffsetPtr);
- if (Version >= 4)
+ if (getVersion() >= 4)
MaxOpsPerInst = DebugLineData.getU8(OffsetPtr);
DefaultIsStmt = DebugLineData.getU8(OffsetPtr);
LineBase = DebugLineData.getU8(OffsetPtr);
@@ -241,9 +251,9 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData,
StandardOpcodeLengths.push_back(OpLen);
}
- if (Version >= 5) {
+ if (getVersion() >= 5) {
if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset,
- IncludeDirectories, FileNames)) {
+ getFormParams(), IncludeDirectories, FileNames)) {
fprintf(stderr,
"warning: parsing line table prologue at 0x%8.8" PRIx64
" found an invalid directory or file table description at"
@@ -333,7 +343,7 @@ void DWARFDebugLine::LineTable::clear() {
}
DWARFDebugLine::ParsingState::ParsingState(struct LineTable *LT)
- : LineTable(LT), RowNumber(0) {
+ : LineTable(LT) {
resetRowAndSequence();
}
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index ed1f5f46dcfb8..861114bde1f2b 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -59,48 +59,13 @@ static const DWARFFormValue::FormClass DWARF4FormClasses[] = {
DWARFFormValue::FC_Flag, // 0x19 DW_FORM_flag_present
};
-namespace {
-
-/// A helper class that can be used in DWARFFormValue.cpp functions that need
-/// to know the byte size of DW_FORM values that vary in size depending on the
-/// DWARF version, address byte size, or DWARF32 or DWARF64.
-class FormSizeHelper {
- uint16_t Version;
- uint8_t AddrSize;
- llvm::dwarf::DwarfFormat Format;
-
-public:
- FormSizeHelper(uint16_t V, uint8_t A, llvm::dwarf::DwarfFormat F)
- : Version(V), AddrSize(A), Format(F) {}
-
- uint8_t getAddressByteSize() const { return AddrSize; }
-
- uint8_t getRefAddrByteSize() const {
- if (Version == 2)
- return AddrSize;
- return getDwarfOffsetByteSize();
- }
-
- uint8_t getDwarfOffsetByteSize() const {
- switch (Format) {
- case dwarf::DwarfFormat::DWARF32:
- return 4;
- case dwarf::DwarfFormat::DWARF64:
- return 8;
- }
- llvm_unreachable("Invalid Format value");
- }
-};
-
-} // end anonymous namespace
-
-template <class T>
-static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) {
+Optional<uint8_t>
+DWARFFormValue::getFixedByteSize(dwarf::Form Form,
+ const DWARFFormParams Params) {
switch (Form) {
case DW_FORM_addr:
- if (U)
- return U->getAddressByteSize();
- return None;
+ assert(Params.Version && Params.AddrSize && "Invalid Params for form");
+ return Params.AddrSize;
case DW_FORM_block: // ULEB128 length L followed by L bytes.
case DW_FORM_block1: // 1 byte length L followed by L bytes.
@@ -121,9 +86,8 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) {
return None;
case DW_FORM_ref_addr:
- if (U)
- return U->getRefAddrByteSize();
- return None;
+ assert(Params.Version && Params.AddrSize && "Invalid Params for form");
+ return Params.getRefAddrByteSize();
case DW_FORM_flag:
case DW_FORM_data1:
@@ -138,6 +102,9 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) {
case DW_FORM_addrx2:
return 2;
+ case DW_FORM_strx3:
+ return 3;
+
case DW_FORM_data4:
case DW_FORM_ref4:
case DW_FORM_ref_sup4:
@@ -151,9 +118,8 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) {
case DW_FORM_line_strp:
case DW_FORM_sec_offset:
case DW_FORM_strp_sup:
- if (U)
- return U->getDwarfOffsetByteSize();
- return None;
+ assert(Params.Version && Params.AddrSize && "Invalid Params for form");
+ return Params.getDwarfOffsetByteSize();
case DW_FORM_data8:
case DW_FORM_ref8:
@@ -178,9 +144,9 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) {
return None;
}
-template <class T>
-static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData,
- uint32_t *OffsetPtr, const T *U) {
+bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData,
+ uint32_t *OffsetPtr,
+ const DWARFFormParams Params) {
bool Indirect = false;
do {
switch (Form) {
@@ -240,7 +206,8 @@ static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData,
case DW_FORM_line_strp:
case DW_FORM_GNU_ref_alt:
case DW_FORM_GNU_strp_alt:
- if (Optional<uint8_t> FixedSize = ::getFixedByteSize(Form, U)) {
+ if (Optional<uint8_t> FixedSize =
+ DWARFFormValue::getFixedByteSize(Form, Params)) {
*OffsetPtr += *FixedSize;
return true;
}
@@ -274,19 +241,6 @@ static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData,
return true;
}
-Optional<uint8_t> DWARFFormValue::getFixedByteSize(dwarf::Form Form,
- const DWARFUnit *U) {
- return ::getFixedByteSize(Form, U);
-}
-
-Optional<uint8_t>
-DWARFFormValue::getFixedByteSize(dwarf::Form Form, uint16_t Version,
- uint8_t AddrSize,
- llvm::dwarf::DwarfFormat Format) {
- FormSizeHelper FSH(Version, AddrSize, Format);
- return ::getFixedByteSize(Form, &FSH);
-}
-
bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
// First, check DWARF4 form classes.
if (Form < makeArrayRef(DWARF4FormClasses).size() &&
@@ -302,6 +256,10 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
case DW_FORM_GNU_str_index:
case DW_FORM_GNU_strp_alt:
case DW_FORM_strx:
+ case DW_FORM_strx1:
+ case DW_FORM_strx2:
+ case DW_FORM_strx3:
+ case DW_FORM_strx4:
return (FC == FC_String);
case DW_FORM_implicit_const:
return (FC == FC_Constant);
@@ -368,6 +326,9 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data,
case DW_FORM_addrx2:
Value.uval = Data.getU16(OffsetPtr);
break;
+ case DW_FORM_strx3:
+ Value.uval = Data.getU24(OffsetPtr);
+ break;
case DW_FORM_data4:
case DW_FORM_ref4:
case DW_FORM_ref_sup4:
@@ -438,24 +399,6 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data,
return true;
}
-bool DWARFFormValue::skipValue(DataExtractor DebugInfoData, uint32_t *OffsetPtr,
- const DWARFUnit *U) const {
- return DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U);
-}
-
-bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData,
- uint32_t *OffsetPtr, const DWARFUnit *U) {
- return skipFormValue(Form, DebugInfoData, OffsetPtr, U);
-}
-
-bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData,
- uint32_t *OffsetPtr, uint16_t Version,
- uint8_t AddrSize,
- llvm::dwarf::DwarfFormat Format) {
- FormSizeHelper FSH(Version, AddrSize, Format);
- return skipFormValue(Form, DebugInfoData, OffsetPtr, &FSH);
-}
-
void DWARFFormValue::dump(raw_ostream &OS) const {
uint64_t UValue = Value.uval;
bool CURelativeOffset = false;
@@ -545,6 +488,10 @@ void DWARFFormValue::dump(raw_ostream &OS) const {
dumpString(OS);
break;
case DW_FORM_strx:
+ case DW_FORM_strx1:
+ case DW_FORM_strx2:
+ case DW_FORM_strx3:
+ case DW_FORM_strx4:
case DW_FORM_GNU_str_index:
OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue);
dumpString(OS);
@@ -623,7 +570,9 @@ Optional<const char *> DWARFFormValue::getAsCString() const {
if (Form == DW_FORM_GNU_strp_alt || U == nullptr)
return None;
uint32_t Offset = Value.uval;
- if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx) {
+ if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx ||
+ Form == DW_FORM_strx1 || Form == DW_FORM_strx2 || Form == DW_FORM_strx3 ||
+ Form == DW_FORM_strx4) {
uint64_t StrOffset;
if (!U->getStringOffsetSectionItem(Offset, StrOffset))
return None;
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 09e6a292e5fe1..fd9c7c2b1d46c 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFUnit.cpp -----------------------------------------------------===//
+//===- DWARFUnit.cpp ------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,7 +8,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
@@ -17,8 +16,6 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Path.h"
#include <algorithm>
@@ -26,6 +23,7 @@
#include <cstddef>
#include <cstdint>
#include <cstdio>
+#include <utility>
#include <vector>
using namespace llvm;
@@ -55,8 +53,8 @@ DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
const DWARFUnitIndex::Entry *IndexEntry)
: Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS),
LineSection(LS), StringSection(SS), StringOffsetSection(SOS),
- StringOffsetSectionBase(0), AddrOffsetSection(AOS), isLittleEndian(LE),
- isDWO(IsDWO), UnitSection(UnitSection), IndexEntry(IndexEntry) {
+ AddrOffsetSection(AOS), isLittleEndian(LE), isDWO(IsDWO),
+ UnitSection(UnitSection), IndexEntry(IndexEntry) {
clear();
}
@@ -64,11 +62,13 @@ DWARFUnit::~DWARFUnit() = default;
bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index,
uint64_t &Result) const {
- uint32_t Offset = AddrOffsetSectionBase + Index * AddrSize;
- if (AddrOffsetSection->Data.size() < Offset + AddrSize)
+ uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize();
+ if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize())
return false;
- DataExtractor DA(AddrOffsetSection->Data, isLittleEndian, AddrSize);
- Result = getRelocatedValue(DA, AddrSize, &Offset, &AddrOffsetSection->Relocs);
+ DataExtractor DA(AddrOffsetSection->Data, isLittleEndian,
+ getAddressByteSize());
+ Result = getRelocatedValue(DA, getAddressByteSize(), &Offset,
+ &AddrOffsetSection->Relocs);
return true;
}
@@ -94,15 +94,17 @@ uint64_t DWARFUnit::getStringOffsetSectionRelocation(uint32_t Index) const {
bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
Length = debug_info.getU32(offset_ptr);
- Version = debug_info.getU16(offset_ptr);
+ // FIXME: Support DWARF64.
+ FormParams.Format = DWARF32;
+ FormParams.Version = debug_info.getU16(offset_ptr);
uint64_t AbbrOffset;
- if (Version >= 5) {
+ if (FormParams.Version >= 5) {
UnitType = debug_info.getU8(offset_ptr);
- AddrSize = debug_info.getU8(offset_ptr);
+ FormParams.AddrSize = debug_info.getU8(offset_ptr);
AbbrOffset = debug_info.getU32(offset_ptr);
} else {
AbbrOffset = debug_info.getU32(offset_ptr);
- AddrSize = debug_info.getU8(offset_ptr);
+ FormParams.AddrSize = debug_info.getU8(offset_ptr);
}
if (IndexEntry) {
if (AbbrOffset)
@@ -117,14 +119,14 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
}
bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1);
- bool VersionOK = DWARFContext::isSupportedVersion(Version);
- bool AddrSizeOK = AddrSize == 4 || AddrSize == 8;
+ bool VersionOK = DWARFContext::isSupportedVersion(getVersion());
+ bool AddrSizeOK = getAddressByteSize() == 4 || getAddressByteSize() == 8;
if (!LengthOK || !VersionOK || !AddrSizeOK)
return false;
// Keep track of the highest DWARF version we encounter across all units.
- Context.setMaxVersionIfGreater(Version);
+ Context.setMaxVersionIfGreater(getVersion());
Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset);
return Abbrevs != nullptr;
@@ -150,7 +152,8 @@ bool DWARFUnit::extractRangeList(uint32_t RangeListOffset,
DWARFDebugRangeList &RangeList) const {
// Require that compile unit is extracted.
assert(!DieArray.empty());
- DataExtractor RangesData(RangeSection->Data, isLittleEndian, AddrSize);
+ DataExtractor RangesData(RangeSection->Data, isLittleEndian,
+ getAddressByteSize());
uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset;
return RangeList.extract(RangesData, &ActualRangeListOffset,
RangeSection->Relocs);
@@ -159,9 +162,8 @@ bool DWARFUnit::extractRangeList(uint32_t RangeListOffset,
void DWARFUnit::clear() {
Offset = 0;
Length = 0;
- Version = 0;
Abbrevs = nullptr;
- AddrSize = 0;
+ FormParams = DWARFFormParams({0, 0, DWARF32});
BaseAddr = 0;
RangeSectionBase = 0;
AddrOffsetSectionBase = 0;
diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index a6240fb60143c..41907e5705637 100644
--- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -279,7 +279,6 @@ bool DWARFVerifier::handleDebugLine() {
bool DWARFVerifier::handleAppleNames() {
NumAppleNamesErrors = 0;
- OS << "Verifying .apple_names...\n";
DataExtractor AppleNamesSection(DCtx.getAppleNamesSection().Data,
DCtx.isLittleEndian(), 0);
@@ -288,10 +287,11 @@ bool DWARFVerifier::handleAppleNames() {
DCtx.getAppleNamesSection().Relocs);
if (!AppleNames.extract()) {
- OS << "error: cannot extract .apple_names accelerator table\n";
- return false;
+ return true;
}
+ OS << "Verifying .apple_names...\n";
+
// Verify that all buckets have a valid hash index or are empty
uint32_t NumBuckets = AppleNames.getNumBuckets();
uint32_t NumHashes = AppleNames.getNumHashes();
diff --git a/lib/DebugInfo/MSF/MSFBuilder.cpp b/lib/DebugInfo/MSF/MSFBuilder.cpp
index 5b1b5d8dc4d55..0f4f785abf55a 100644
--- a/lib/DebugInfo/MSF/MSFBuilder.cpp
+++ b/lib/DebugInfo/MSF/MSFBuilder.cpp
@@ -1,3 +1,4 @@
+//===- MSFBuilder.cpp -----------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,22 +7,30 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace llvm::msf;
using namespace llvm::support;
-namespace {
-const uint32_t kSuperBlockBlock = 0;
-const uint32_t kFreePageMap0Block = 1;
-const uint32_t kFreePageMap1Block = 2;
-const uint32_t kNumReservedPages = 3;
+static const uint32_t kSuperBlockBlock = 0;
+static const uint32_t kFreePageMap0Block = 1;
+static const uint32_t kFreePageMap1Block = 2;
+static const uint32_t kNumReservedPages = 3;
-const uint32_t kDefaultFreePageMap = kFreePageMap0Block;
-const uint32_t kDefaultBlockMapAddr = kNumReservedPages;
-}
+static const uint32_t kDefaultFreePageMap = kFreePageMap0Block;
+static const uint32_t kDefaultBlockMapAddr = kNumReservedPages;
MSFBuilder::MSFBuilder(uint32_t BlockSize, uint32_t MinBlockCount, bool CanGrow,
BumpPtrAllocator &Allocator)
@@ -263,7 +272,7 @@ Expected<MSFLayout> MSFBuilder::build() {
// The stream sizes should be re-allocated as a stable pointer and the stream
// map should have each of its entries allocated as a separate stable pointer.
- if (StreamData.size() > 0) {
+ if (!StreamData.empty()) {
ulittle32_t *Sizes = Allocator.Allocate<ulittle32_t>(StreamData.size());
L.StreamSizes = ArrayRef<ulittle32_t>(Sizes, StreamData.size());
L.StreamMap.resize(StreamData.size());
diff --git a/lib/DebugInfo/MSF/MSFCommon.cpp b/lib/DebugInfo/MSF/MSFCommon.cpp
index fdab7884646ec..1facf5efb4bbb 100644
--- a/lib/DebugInfo/MSF/MSFCommon.cpp
+++ b/lib/DebugInfo/MSF/MSFCommon.cpp
@@ -1,4 +1,4 @@
-//===- MSFCommon.cpp - Common types and functions for MSF files -*- C++ -*-===//
+//===- MSFCommon.cpp - Common types and functions for MSF files -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,6 +9,10 @@
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <cstring>
using namespace llvm;
using namespace llvm::msf;
diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp
index faf2442bc94bb..e45f4ae0ed940 100644
--- a/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -8,23 +8,33 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-
-#include "llvm/DebugInfo/MSF/IMSFFile.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/MSF/MSFStreamLayout.h"
-#include "llvm/Support/BinaryStreamError.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace llvm::msf;
namespace {
+
template <typename Base> class MappedBlockStreamImpl : public Base {
public:
template <typename... Args>
MappedBlockStreamImpl(Args &&... Params)
: Base(std::forward<Args>(Params)...) {}
};
-}
+
+} // end anonymous namespace
static void initializeFpmStreamLayout(const MSFLayout &Layout,
MSFStreamLayout &FpmLayout) {
@@ -39,7 +49,8 @@ static void initializeFpmStreamLayout(const MSFLayout &Layout,
FpmLayout.Length = msf::getFullFpmByteSize(Layout);
}
-typedef std::pair<uint32_t, uint32_t> Interval;
+using Interval = std::pair<uint32_t, uint32_t>;
+
static Interval intersect(const Interval &I1, const Interval &I2) {
return std::make_pair(std::max(I1.first, I2.first),
std::min(I1.second, I2.second));
@@ -214,7 +225,7 @@ bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
uint32_t OffsetInBlock = Offset % BlockSize;
uint32_t BytesFromFirstBlock = std::min(Size, BlockSize - OffsetInBlock);
uint32_t NumAdditionalBlocks =
- llvm::alignTo(Size - BytesFromFirstBlock, BlockSize) / BlockSize;
+ alignTo(Size - BytesFromFirstBlock, BlockSize) / BlockSize;
uint32_t RequiredContiguousBlocks = NumAdditionalBlocks + 1;
uint32_t E = StreamLayout.Blocks[BlockNum];
diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp
index ef47b92b4f2f3..ef9390cda3127 100644
--- a/lib/DebugInfo/PDB/DIA/DIASession.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp
@@ -151,7 +151,7 @@ void DIASession::setLoadAddress(uint64_t Address) {
Session->put_loadAddress(Address);
}
-std::unique_ptr<PDBSymbolExe> DIASession::getGlobalScope() const {
+std::unique_ptr<PDBSymbolExe> DIASession::getGlobalScope() {
CComPtr<IDiaSymbol> GlobalScope;
if (S_OK != Session->get_globalScope(&GlobalScope))
return nullptr;
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index 81a9d3eeec619..745dd742aadc3 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -51,6 +51,7 @@ DbiModuleDescriptorBuilder::DbiModuleDescriptorBuilder(StringRef ModuleName,
uint32_t ModIndex,
msf::MSFBuilder &Msf)
: MSF(Msf), ModuleName(ModuleName) {
+ ::memset(&Layout, 0, sizeof(Layout));
Layout.Mod = ModIndex;
}
@@ -102,6 +103,7 @@ template <typename T> struct Foo {
template <typename T> Foo<T> makeFoo(T &&t) { return Foo<T>(std::move(t)); }
void DbiModuleDescriptorBuilder::finalize() {
+ Layout.SC.ModuleIndex = Layout.Mod;
Layout.FileNameOffs = 0; // TODO: Fix this
Layout.Flags = 0; // TODO: Fix this
Layout.C11Bytes = 0;
@@ -182,3 +184,9 @@ void DbiModuleDescriptorBuilder::addDebugSubsection(
C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>(
std::move(Subsection), CodeViewContainer::Pdb));
}
+
+void DbiModuleDescriptorBuilder::addDebugSubsection(
+ const DebugSubsectionRecord &SubsectionContents) {
+ C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>(
+ SubsectionContents, CodeViewContainer::Pdb));
+}
diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index 24322d942facc..a1f0671dec3e6 100644
--- a/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -99,29 +99,27 @@ Error DbiStream::reload() {
return make_error<RawError>(raw_error_code::corrupt_file,
"DBI type server substream not aligned.");
- BinaryStreamRef ModInfoSubstream;
- BinaryStreamRef FileInfoSubstream;
- if (auto EC =
- Reader.readStreamRef(ModInfoSubstream, Header->ModiSubstreamSize))
+ if (auto EC = Reader.readSubstream(ModiSubstream, Header->ModiSubstreamSize))
return EC;
- if (auto EC = Reader.readStreamRef(SecContrSubstream,
+ if (auto EC = Reader.readSubstream(SecContrSubstream,
Header->SecContrSubstreamSize))
return EC;
- if (auto EC = Reader.readStreamRef(SecMapSubstream, Header->SectionMapSize))
+ if (auto EC = Reader.readSubstream(SecMapSubstream, Header->SectionMapSize))
return EC;
- if (auto EC = Reader.readStreamRef(FileInfoSubstream, Header->FileInfoSize))
+ if (auto EC = Reader.readSubstream(FileInfoSubstream, Header->FileInfoSize))
return EC;
if (auto EC =
- Reader.readStreamRef(TypeServerMapSubstream, Header->TypeServerSize))
+ Reader.readSubstream(TypeServerMapSubstream, Header->TypeServerSize))
return EC;
- if (auto EC = Reader.readStreamRef(ECSubstream, Header->ECSubstreamSize))
+ if (auto EC = Reader.readSubstream(ECSubstream, Header->ECSubstreamSize))
return EC;
if (auto EC = Reader.readArray(
DbgStreams, Header->OptionalDbgHdrSize / sizeof(ulittle16_t)))
return EC;
- if (auto EC = Modules.initialize(ModInfoSubstream, FileInfoSubstream))
+ if (auto EC = Modules.initialize(ModiSubstream.StreamData,
+ FileInfoSubstream.StreamData))
return EC;
if (auto EC = initializeSectionContributionData())
@@ -137,8 +135,8 @@ Error DbiStream::reload() {
return make_error<RawError>(raw_error_code::corrupt_file,
"Found unexpected bytes in DBI Stream.");
- if (ECSubstream.getLength() > 0) {
- BinaryStreamReader ECReader(ECSubstream);
+ if (!ECSubstream.empty()) {
+ BinaryStreamReader ECReader(ECSubstream.StreamData);
if (auto EC = ECNames.reload(ECReader))
return EC;
}
@@ -228,10 +226,10 @@ void DbiStream::visitSectionContributions(
}
Error DbiStream::initializeSectionContributionData() {
- if (SecContrSubstream.getLength() == 0)
+ if (SecContrSubstream.empty())
return Error::success();
- BinaryStreamReader SCReader(SecContrSubstream);
+ BinaryStreamReader SCReader(SecContrSubstream.StreamData);
if (auto EC = SCReader.readEnum(SectionContribVersion))
return EC;
@@ -302,11 +300,33 @@ Error DbiStream::initializeFpoRecords() {
return Error::success();
}
+BinarySubstreamRef DbiStream::getSectionContributionData() const {
+ return SecContrSubstream;
+}
+
+BinarySubstreamRef DbiStream::getSecMapSubstreamData() const {
+ return SecMapSubstream;
+}
+
+BinarySubstreamRef DbiStream::getModiSubstreamData() const {
+ return ModiSubstream;
+}
+
+BinarySubstreamRef DbiStream::getFileInfoSubstreamData() const {
+ return FileInfoSubstream;
+}
+
+BinarySubstreamRef DbiStream::getTypeServerMapSubstreamData() const {
+ return TypeServerMapSubstream;
+}
+
+BinarySubstreamRef DbiStream::getECSubstreamData() const { return ECSubstream; }
+
Error DbiStream::initializeSectionMapData() {
- if (SecMapSubstream.getLength() == 0)
+ if (SecMapSubstream.empty())
return Error::success();
- BinaryStreamReader SMReader(SecMapSubstream);
+ BinaryStreamReader SMReader(SecMapSubstream.StreamData);
const SecMapHeader *Header;
if (auto EC = SMReader.readObject(Header))
return EC;
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index e7304b444f23f..aad247ea185f2 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -90,10 +90,14 @@ Error DbiStreamBuilder::addModuleSourceFile(StringRef Module, StringRef File) {
if (ModIter == ModiMap.end())
return make_error<RawError>(raw_error_code::no_entry,
"The specified module was not found");
+ return addModuleSourceFile(*ModIter->second, File);
+}
+
+Error DbiStreamBuilder::addModuleSourceFile(DbiModuleDescriptorBuilder &Module,
+ StringRef File) {
uint32_t Index = SourceFileNames.size();
SourceFileNames.insert(std::make_pair(File, Index));
- auto &ModEntry = *ModIter;
- ModEntry.second->addSourceFile(File);
+ Module.addSourceFile(File);
return Error::success();
}
@@ -233,6 +237,7 @@ Error DbiStreamBuilder::finalize() {
return EC;
DbiStreamHeader *H = Allocator.Allocate<DbiStreamHeader>();
+ ::memset(H, 0, sizeof(DbiStreamHeader));
H->VersionHeader = *VerHeader;
H->VersionSignature = -1;
H->Age = Age;
diff --git a/lib/DebugInfo/PDB/Native/InfoStream.cpp b/lib/DebugInfo/PDB/Native/InfoStream.cpp
index a3979d480bf45..21b66b3e7bcff 100644
--- a/lib/DebugInfo/PDB/Native/InfoStream.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStream.cpp
@@ -57,6 +57,10 @@ Error InfoStream::reload() {
uint32_t NewOffset = Reader.getOffset();
NamedStreamMapByteSize = NewOffset - Offset;
+ Reader.setOffset(Offset);
+ if (auto EC = Reader.readSubstream(SubNamedStreams, NamedStreamMapByteSize))
+ return EC;
+
bool Stop = false;
while (!Stop && !Reader.empty()) {
PdbRaw_FeatureSig Sig;
@@ -129,3 +133,7 @@ ArrayRef<PdbRaw_FeatureSig> InfoStream::getFeatureSignatures() const {
const NamedStreamMap &InfoStream::getNamedStreams() const {
return NamedStreams;
}
+
+BinarySubstreamRef InfoStream::getNamedStreamsBuffer() const {
+ return SubNamedStreams;
+}
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index 4186f2eb6ba01..83c56574a16e5 100644
--- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -47,15 +47,19 @@ Error ModuleDebugStreamRef::reload() {
if (auto EC = Reader.readInteger(Signature))
return EC;
- if (auto EC = Reader.readArray(SymbolsSubstream, SymbolSize - 4))
+ if (auto EC = Reader.readSubstream(SymbolsSubstream, SymbolSize - 4))
return EC;
-
- if (auto EC = Reader.readStreamRef(C11LinesSubstream, C11Size))
+ if (auto EC = Reader.readSubstream(C11LinesSubstream, C11Size))
+ return EC;
+ if (auto EC = Reader.readSubstream(C13LinesSubstream, C13Size))
return EC;
- if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size))
+
+ BinaryStreamReader SymbolReader(SymbolsSubstream.StreamData);
+ if (auto EC =
+ SymbolReader.readArray(SymbolArray, SymbolReader.bytesRemaining()))
return EC;
- BinaryStreamReader SubsectionsReader(C13LinesSubstream);
+ BinaryStreamReader SubsectionsReader(C13LinesSubstream.StreamData);
if (auto EC = SubsectionsReader.readArray(Subsections,
SubsectionsReader.bytesRemaining()))
return EC;
@@ -63,7 +67,7 @@ Error ModuleDebugStreamRef::reload() {
uint32_t GlobalRefsSize;
if (auto EC = Reader.readInteger(GlobalRefsSize))
return EC;
- if (auto EC = Reader.readStreamRef(GlobalRefsSubstream, GlobalRefsSize))
+ if (auto EC = Reader.readSubstream(GlobalRefsSubstream, GlobalRefsSize))
return EC;
if (Reader.bytesRemaining() > 0)
return make_error<RawError>(raw_error_code::corrupt_file,
@@ -72,9 +76,25 @@ Error ModuleDebugStreamRef::reload() {
return Error::success();
}
+BinarySubstreamRef ModuleDebugStreamRef::getSymbolsSubstream() const {
+ return SymbolsSubstream;
+}
+
+BinarySubstreamRef ModuleDebugStreamRef::getC11LinesSubstream() const {
+ return C11LinesSubstream;
+}
+
+BinarySubstreamRef ModuleDebugStreamRef::getC13LinesSubstream() const {
+ return C13LinesSubstream;
+}
+
+BinarySubstreamRef ModuleDebugStreamRef::getGlobalRefsSubstream() const {
+ return GlobalRefsSubstream;
+}
+
iterator_range<codeview::CVSymbolArray::Iterator>
ModuleDebugStreamRef::symbols(bool *HadError) const {
- return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end());
+ return make_range(SymbolArray.begin(HadError), SymbolArray.end());
}
llvm::iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator>
@@ -83,7 +103,7 @@ ModuleDebugStreamRef::subsections() const {
}
bool ModuleDebugStreamRef::hasDebugSubsections() const {
- return C13LinesSubstream.getLength() > 0;
+ return !C13LinesSubstream.empty();
}
Error ModuleDebugStreamRef::commit() { return Error::success(); }
diff --git a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
index c7ba32b82bc6b..4f90cd9cd8ac0 100644
--- a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
+++ b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -23,6 +23,14 @@
using namespace llvm;
using namespace llvm::pdb;
+// FIXME: This shouldn't be necessary, but if we insert the strings in any
+// other order, cvdump cannot read the generated name map. This suggests that
+// we may be using the wrong hash function. A closer inspection of the cvdump
+// source code may reveal something, but for now this at least makes us work,
+// even if only by accident.
+static constexpr const char *OrderedStreamNames[] = {"/LinkInfo", "/names",
+ "/src/headerblock"};
+
NamedStreamMap::NamedStreamMap() = default;
Error NamedStreamMap::load(BinaryStreamReader &Stream) {
@@ -73,9 +81,10 @@ Error NamedStreamMap::commit(BinaryStreamWriter &Writer) const {
if (auto EC = Writer.writeInteger(FinalizedInfo->StringDataBytes))
return EC;
- // Now all of the string data itself.
- for (const auto &Item : Mapping) {
- if (auto EC = Writer.writeCString(Item.getKey()))
+ for (const auto &Name : OrderedStreamNames) {
+ auto Item = Mapping.find(Name);
+ assert(Item != Mapping.end());
+ if (auto EC = Writer.writeCString(Item->getKey()))
return EC;
}
@@ -93,9 +102,12 @@ uint32_t NamedStreamMap::finalize() {
// Build the finalized hash table.
FinalizedHashTable.clear();
FinalizedInfo.emplace();
- for (const auto &Item : Mapping) {
- FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item.getValue());
- FinalizedInfo->StringDataBytes += Item.getKeyLength() + 1;
+
+ for (const auto &Name : OrderedStreamNames) {
+ auto Item = Mapping.find(Name);
+ assert(Item != Mapping.end());
+ FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item->getValue());
+ FinalizedInfo->StringDataBytes += Item->getKeyLength() + 1;
}
// Number of bytes of string data.
diff --git a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
index 77f832582f824..180c169ec209c 100644
--- a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
@@ -9,17 +9,24 @@
#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/ADT/STLExtras.h"
+
namespace llvm {
namespace pdb {
NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session,
+ uint32_t SymbolId,
DbiModuleDescriptor MI)
- : NativeRawSymbol(Session), Module(MI) {}
+ : NativeRawSymbol(Session, SymbolId), Module(MI) {}
PDB_SymType NativeCompilandSymbol::getSymTag() const {
return PDB_SymType::Compiland;
}
+std::unique_ptr<NativeRawSymbol> NativeCompilandSymbol::clone() const {
+ return llvm::make_unique<NativeCompilandSymbol>(Session, SymbolId, Module);
+}
+
bool NativeCompilandSymbol::isEditAndContinueEnabled() const {
return Module.hasECInfo();
}
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
index 97319fd77d117..c23120041164a 100644
--- a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
@@ -34,7 +34,7 @@ NativeEnumModules::getChildAtIndex(uint32_t Index) const {
return nullptr;
return std::unique_ptr<PDBSymbol>(new PDBSymbolCompiland(
Session, std::unique_ptr<IPDBRawSymbol>(new NativeCompilandSymbol(
- Session, Modules.getModuleDescriptor(Index)))));
+ Session, 0, Modules.getModuleDescriptor(Index)))));
}
std::unique_ptr<PDBSymbol> NativeEnumModules::getNext() {
diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
index bb52560be167a..6206155b9fb64 100644
--- a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h"
@@ -17,8 +18,12 @@
namespace llvm {
namespace pdb {
-NativeExeSymbol::NativeExeSymbol(NativeSession &Session)
- : NativeRawSymbol(Session), File(Session.getPDBFile()) {}
+NativeExeSymbol::NativeExeSymbol(NativeSession &Session, uint32_t SymbolId)
+ : NativeRawSymbol(Session, SymbolId), File(Session.getPDBFile()) {}
+
+std::unique_ptr<NativeRawSymbol> NativeExeSymbol::clone() const {
+ return llvm::make_unique<NativeExeSymbol>(Session, SymbolId);
+}
std::unique_ptr<IPDBEnumSymbols>
NativeExeSymbol::findChildren(PDB_SymType Type) const {
diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index 70968d4330b07..ed6db63edbabf 100644
--- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -22,8 +22,8 @@
using namespace llvm;
using namespace llvm::pdb;
-NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession)
- : Session(PDBSession) {}
+NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, uint32_t SymbolId)
+ : Session(PDBSession), SymbolId(SymbolId) {}
void NativeRawSymbol::dump(raw_ostream &OS, int Indent) const {}
@@ -253,9 +253,7 @@ uint32_t NativeRawSymbol::getSubTypeId() const {
std::string NativeRawSymbol::getSymbolsFileName() const { return ""; }
-uint32_t NativeRawSymbol::getSymIndexId() const {
- return 0;
-}
+uint32_t NativeRawSymbol::getSymIndexId() const { return SymbolId; }
uint32_t NativeRawSymbol::getTargetOffset() const {
return 0;
diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 7e6843bceb7db..3ab381e76e628 100644
--- a/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -70,12 +70,11 @@ uint64_t NativeSession::getLoadAddress() const { return 0; }
void NativeSession::setLoadAddress(uint64_t Address) {}
-std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() const {
- auto RawSymbol =
- llvm::make_unique<NativeExeSymbol>(const_cast<NativeSession &>(*this));
+std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() {
+ auto RawSymbol = llvm::make_unique<NativeExeSymbol>(*this, 0);
auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol)));
std::unique_ptr<PDBSymbolExe> ExeSymbol(
- static_cast<PDBSymbolExe *>(PdbSymbol.release()));
+ static_cast<PDBSymbolExe *>(PdbSymbol.release()));
return ExeSymbol;
}
diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp
index a9597cdf4c4d3..4f6ebb0cb3428 100644
--- a/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -230,6 +230,14 @@ ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
return ContainerLayout.DirectoryBlocks;
}
+MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
+ MSFStreamLayout Result;
+ auto Blocks = getStreamBlockList(StreamIdx);
+ Result.Blocks.assign(Blocks.begin(), Blocks.end());
+ Result.Length = getStreamByteSize(StreamIdx);
+ return Result;
+}
+
Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
if (!Globals) {
auto DbiS = getPDBDbiStream();
diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp
index 67c803d3124ec..f917ef91f6396 100644
--- a/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -66,7 +66,13 @@ Error TpiStream::reload() {
"TPI Stream Invalid number of hash buckets.");
// The actual type records themselves come from this stream
- if (auto EC = Reader.readArray(TypeRecords, Header->TypeRecordBytes))
+ if (auto EC =
+ Reader.readSubstream(TypeRecordsSubstream, Header->TypeRecordBytes))
+ return EC;
+
+ BinaryStreamReader RecordReader(TypeRecordsSubstream.StreamData);
+ if (auto EC =
+ RecordReader.readArray(TypeRecords, TypeRecordsSubstream.size()))
return EC;
// Hash indices, hash values, etc come from the hash stream.
@@ -135,6 +141,10 @@ uint16_t TpiStream::getTypeHashStreamAuxIndex() const {
uint32_t TpiStream::getNumHashBuckets() const { return Header->NumHashBuckets; }
uint32_t TpiStream::getHashKeySize() const { return Header->HashKeySize; }
+BinarySubstreamRef TpiStream::getTypeRecordsSubstream() const {
+ return TypeRecordsSubstream;
+}
+
FixedStreamArray<support::ulittle32_t> TpiStream::getHashValues() const {
return HashValues;
}
diff --git a/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/lib/ExecutionEngine/Orc/OrcCBindings.cpp
index 8dcd49aaab5bb..5fe259f80b6fb 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindings.cpp
+++ b/lib/ExecutionEngine/Orc/OrcCBindings.cpp
@@ -12,6 +12,24 @@
using namespace llvm;
+LLVMSharedModuleRef LLVMOrcMakeSharedModule(LLVMModuleRef Mod) {
+ return wrap(new std::shared_ptr<Module>(unwrap(Mod)));
+}
+
+void LLVMOrcDisposeSharedModuleRef(LLVMSharedModuleRef SharedMod) {
+ delete unwrap(SharedMod);
+}
+
+LLVMSharedObjectBufferRef
+LLVMOrcMakeSharedObjectBuffer(LLVMMemoryBufferRef ObjBuffer) {
+ return wrap(new std::shared_ptr<MemoryBuffer>(unwrap(ObjBuffer)));
+}
+
+void
+LLVMOrcDisposeSharedObjectBufferRef(LLVMSharedObjectBufferRef SharedObjBuffer) {
+ delete unwrap(SharedObjBuffer);
+}
+
LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM) {
TargetMachine *TM2(unwrap(TM));
@@ -65,21 +83,23 @@ LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
}
LLVMOrcModuleHandle
-LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod,
+LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
+ LLVMSharedModuleRef Mod,
LLVMOrcSymbolResolverFn SymbolResolver,
void *SymbolResolverCtx) {
OrcCBindingsStack &J = *unwrap(JITStack);
- Module *M(unwrap(Mod));
- return J.addIRModuleEager(M, SymbolResolver, SymbolResolverCtx);
+ std::shared_ptr<Module> *M(unwrap(Mod));
+ return J.addIRModuleEager(*M, SymbolResolver, SymbolResolverCtx);
}
LLVMOrcModuleHandle
-LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod,
+LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
+ LLVMSharedModuleRef Mod,
LLVMOrcSymbolResolverFn SymbolResolver,
void *SymbolResolverCtx) {
OrcCBindingsStack &J = *unwrap(JITStack);
- Module *M(unwrap(Mod));
- return J.addIRModuleLazy(M, SymbolResolver, SymbolResolverCtx);
+ std::shared_ptr<Module> *M(unwrap(Mod));
+ return J.addIRModuleLazy(*M, SymbolResolver, SymbolResolverCtx);
}
void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H) {
diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index a79dd844bf4f7..931d0a9eb2ade 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -1,4 +1,4 @@
-//===--- OrcCBindingsStack.h - Orc JIT stack for C bindings ---*- C++ -*---===//
+//===- OrcCBindingsStack.h - Orc JIT stack for C bindings -----*- C++ -*---===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,39 +11,63 @@
#define LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H
#include "llvm-c/OrcBindings.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm-c/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
namespace llvm {
class OrcCBindingsStack;
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(std::shared_ptr<Module>,
+ LLVMSharedModuleRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(std::shared_ptr<MemoryBuffer>,
+ LLVMSharedObjectBufferRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcCBindingsStack, LLVMOrcJITStackRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef)
class OrcCBindingsStack {
public:
- typedef orc::JITCompileCallbackManager CompileCallbackMgr;
- typedef orc::RTDyldObjectLinkingLayer<> ObjLayerT;
- typedef orc::IRCompileLayer<ObjLayerT> CompileLayerT;
- typedef orc::CompileOnDemandLayer<CompileLayerT, CompileCallbackMgr>
- CODLayerT;
- typedef std::function<std::unique_ptr<CompileCallbackMgr>()>
- CallbackManagerBuilder;
+ using CompileCallbackMgr = orc::JITCompileCallbackManager;
+ using ObjLayerT = orc::RTDyldObjectLinkingLayer;
+ using CompileLayerT = orc::IRCompileLayer<ObjLayerT, orc::SimpleCompiler>;
+ using CODLayerT =
+ orc::CompileOnDemandLayer<CompileLayerT, CompileCallbackMgr>;
- typedef CODLayerT::IndirectStubsManagerBuilderT IndirectStubsManagerBuilder;
+ using CallbackManagerBuilder =
+ std::function<std::unique_ptr<CompileCallbackMgr>()>;
+
+ using IndirectStubsManagerBuilder = CODLayerT::IndirectStubsManagerBuilderT;
private:
class GenericHandle {
public:
- virtual ~GenericHandle() {}
+ virtual ~GenericHandle() = default;
+
virtual JITSymbol findSymbolIn(const std::string &Name,
bool ExportedSymbolsOnly) = 0;
virtual void removeModule() = 0;
@@ -51,7 +75,7 @@ private:
template <typename LayerT> class GenericHandleImpl : public GenericHandle {
public:
- GenericHandleImpl(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle)
+ GenericHandleImpl(LayerT &Layer, typename LayerT::ModuleHandleT Handle)
: Layer(Layer), Handle(std::move(Handle)) {}
JITSymbol findSymbolIn(const std::string &Name,
@@ -59,31 +83,28 @@ private:
return Layer.findSymbolIn(Handle, Name, ExportedSymbolsOnly);
}
- void removeModule() override { return Layer.removeModuleSet(Handle); }
+ void removeModule() override { return Layer.removeModule(Handle); }
private:
LayerT &Layer;
- typename LayerT::ModuleSetHandleT Handle;
+ typename LayerT::ModuleHandleT Handle;
};
template <typename LayerT>
std::unique_ptr<GenericHandleImpl<LayerT>>
- createGenericHandle(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle) {
+ createGenericHandle(LayerT &Layer, typename LayerT::ModuleHandleT Handle) {
return llvm::make_unique<GenericHandleImpl<LayerT>>(Layer,
std::move(Handle));
}
public:
- // We need a 'ModuleSetHandleT' to conform to the layer concept.
- typedef unsigned ModuleSetHandleT;
-
- typedef unsigned ModuleHandleT;
+ using ModuleHandleT = unsigned;
OrcCBindingsStack(TargetMachine &TM,
std::unique_ptr<CompileCallbackMgr> CCMgr,
IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
: DL(TM.createDataLayout()), IndirectStubsMgr(IndirectStubsMgrBuilder()),
- CCMgr(std::move(CCMgr)), ObjectLayer(),
+ CCMgr(std::move(CCMgr)),
CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)),
CODLayer(CompileLayer,
[](Function &F) { return std::set<Function *>({&F}); },
@@ -153,7 +174,7 @@ public:
if (ExternalResolver)
return JITSymbol(
ExternalResolver(Name.c_str(), ExternalResolverCtx),
- llvm::JITSymbolFlags::Exported);
+ JITSymbolFlags::Exported);
return JITSymbol(nullptr);
},
@@ -163,11 +184,10 @@ public:
}
template <typename LayerT>
- ModuleHandleT addIRModule(LayerT &Layer, Module *M,
+ ModuleHandleT addIRModule(LayerT &Layer, std::shared_ptr<Module> M,
std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr,
LLVMOrcSymbolResolverFn ExternalResolver,
void *ExternalResolverCtx) {
-
// Attach a data-layout if one isn't already present.
if (M->getDataLayout().isDefault())
M->setDataLayout(DL);
@@ -184,11 +204,8 @@ public:
auto Resolver = createResolver(ExternalResolver, ExternalResolverCtx);
// Add the module to the JIT.
- std::vector<Module *> S;
- S.push_back(std::move(M));
-
- auto LH = Layer.addModuleSet(std::move(S), std::move(MemMgr),
- std::move(Resolver));
+ auto LH = Layer.addModule(std::move(M), std::move(MemMgr),
+ std::move(Resolver));
ModuleHandleT H = createHandle(Layer, LH);
// Run the static constructors, and save the static destructor runner for
@@ -201,7 +218,7 @@ public:
return H;
}
- ModuleHandleT addIRModuleEager(Module *M,
+ ModuleHandleT addIRModuleEager(std::shared_ptr<Module> M,
LLVMOrcSymbolResolverFn ExternalResolver,
void *ExternalResolverCtx) {
return addIRModule(CompileLayer, std::move(M),
@@ -209,7 +226,7 @@ public:
std::move(ExternalResolver), ExternalResolverCtx);
}
- ModuleHandleT addIRModuleLazy(Module *M,
+ ModuleHandleT addIRModuleLazy(std::shared_ptr<Module> M,
LLVMOrcSymbolResolverFn ExternalResolver,
void *ExternalResolverCtx) {
return addIRModule(CODLayer, std::move(M),
@@ -238,8 +255,7 @@ public:
private:
template <typename LayerT>
- unsigned createHandle(LayerT &Layer,
- typename LayerT::ModuleSetHandleT Handle) {
+ unsigned createHandle(LayerT &Layer, typename LayerT::ModuleHandleT Handle) {
unsigned NewHandle;
if (!FreeHandleIndexes.empty()) {
NewHandle = FreeHandleIndexes.back();
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
index b7a68e041c128..f89f21adff417 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
@@ -124,5 +124,10 @@ OrcMCJITReplacement::runFunction(Function *F,
llvm_unreachable("Full-featured argument passing not supported yet!");
}
+void OrcMCJITReplacement::runStaticConstructorsDestructors(bool isDtors) {
+ for (auto &M : LocalModules)
+ ExecutionEngine::runStaticConstructorsDestructors(*M, isDtors);
+}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 7dd6b17d33cb4..b20690c7caafc 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -1,4 +1,4 @@
-//===---- OrcMCJITReplacement.h - Orc based MCJIT replacement ---*- C++ -*-===//
+//===- OrcMCJITReplacement.h - Orc based MCJIT replacement ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -24,9 +24,12 @@
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
@@ -45,6 +48,9 @@
#include <vector>
namespace llvm {
+
+class ObjectCache;
+
namespace orc {
class OrcMCJITReplacement : public ExecutionEngine {
@@ -151,7 +157,6 @@ class OrcMCJITReplacement : public ExecutionEngine {
};
private:
-
static ExecutionEngine *
createOrcMCJITReplacement(std::string *ErrorMsg,
std::shared_ptr<MCJITMemoryManager> MemMgr,
@@ -162,10 +167,6 @@ private:
}
public:
- static void Register() {
- OrcMCJITReplacementCtor = createOrcMCJITReplacement;
- }
-
OrcMCJITReplacement(
std::shared_ptr<MCJITMemoryManager> MemMgr,
std::shared_ptr<JITSymbolResolver> ClientResolver,
@@ -178,8 +179,11 @@ public:
CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)),
LazyEmitLayer(CompileLayer) {}
- void addModule(std::unique_ptr<Module> M) override {
+ static void Register() {
+ OrcMCJITReplacementCtor = createOrcMCJITReplacement;
+ }
+ void addModule(std::unique_ptr<Module> M) override {
// If this module doesn't have a DataLayout attached then attach the
// default.
if (M->getDataLayout().isDefault()) {
@@ -187,29 +191,44 @@ public:
} else {
assert(M->getDataLayout() == getDataLayout() && "DataLayout Mismatch");
}
- Modules.push_back(std::move(M));
- std::vector<Module *> Ms;
- Ms.push_back(&*Modules.back());
- LazyEmitLayer.addModuleSet(std::move(Ms), &MemMgr, &Resolver);
+ auto *MPtr = M.release();
+ ShouldDelete[MPtr] = true;
+ auto Deleter =
+ [this](Module *Mod) {
+ if (ShouldDelete[Mod])
+ delete Mod;
+ };
+ LocalModules.push_back(std::shared_ptr<Module>(MPtr, std::move(Deleter)));
+ LazyEmitLayer.addModule(LocalModules.back(), &MemMgr, &Resolver);
}
void addObjectFile(std::unique_ptr<object::ObjectFile> O) override {
- std::vector<std::unique_ptr<object::ObjectFile>> Objs;
- Objs.push_back(std::move(O));
- ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
+ auto Obj =
+ std::make_shared<object::OwningBinary<object::ObjectFile>>(std::move(O),
+ nullptr);
+ ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver);
}
void addObjectFile(object::OwningBinary<object::ObjectFile> O) override {
- std::vector<std::unique_ptr<object::OwningBinary<object::ObjectFile>>> Objs;
- Objs.push_back(
- llvm::make_unique<object::OwningBinary<object::ObjectFile>>(
- std::move(O)));
- ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
+ auto Obj =
+ std::make_shared<object::OwningBinary<object::ObjectFile>>(std::move(O));
+ ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver);
}
void addArchive(object::OwningBinary<object::Archive> A) override {
Archives.push_back(std::move(A));
}
+
+ bool removeModule(Module *M) override {
+ for (auto I = LocalModules.begin(), E = LocalModules.end(); I != E; ++I) {
+ if (I->get() == M) {
+ ShouldDelete[M] = false;
+ LocalModules.erase(I);
+ return true;
+ }
+ }
+ return false;
+ }
uint64_t getSymbolAddress(StringRef Name) {
return findSymbol(Name).getAddress();
@@ -256,13 +275,15 @@ public:
ArrayRef<GenericValue> ArgValues) override;
void setObjectCache(ObjectCache *NewCache) override {
- CompileLayer.setObjectCache(NewCache);
+ CompileLayer.getCompiler().setObjectCache(NewCache);
}
void setProcessAllSections(bool ProcessAllSections) override {
ObjectLayer.setProcessAllSections(ProcessAllSections);
}
+ void runStaticConstructorsDestructors(bool isDtors) override;
+
private:
JITSymbol findMangledSymbol(StringRef Name) {
if (auto Sym = LazyEmitLayer.findSymbol(Name, false))
@@ -294,10 +315,12 @@ private:
}
std::unique_ptr<object::Binary> &ChildBin = ChildBinOrErr.get();
if (ChildBin->isObject()) {
- std::vector<std::unique_ptr<object::ObjectFile>> ObjSet;
- ObjSet.push_back(std::unique_ptr<object::ObjectFile>(
- static_cast<object::ObjectFile *>(ChildBin.release())));
- ObjectLayer.addObjectSet(std::move(ObjSet), &MemMgr, &Resolver);
+ std::unique_ptr<object::ObjectFile> ChildObj(
+ static_cast<object::ObjectFile*>(ChildBinOrErr->release()));
+ auto Obj =
+ std::make_shared<object::OwningBinary<object::ObjectFile>>(
+ std::move(ChildObj), nullptr);
+ ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver);
if (auto Sym = ObjectLayer.findSymbol(Name, true))
return Sym;
}
@@ -308,34 +331,19 @@ private:
class NotifyObjectLoadedT {
public:
- typedef std::vector<std::unique_ptr<RuntimeDyld::LoadedObjectInfo>>
- LoadedObjInfoListT;
+ using LoadedObjInfoListT =
+ std::vector<std::unique_ptr<RuntimeDyld::LoadedObjectInfo>>;
NotifyObjectLoadedT(OrcMCJITReplacement &M) : M(M) {}
- template <typename ObjListT>
- void operator()(RTDyldObjectLinkingLayerBase::ObjSetHandleT H,
- const ObjListT &Objects,
- const LoadedObjInfoListT &Infos) const {
+ void operator()(RTDyldObjectLinkingLayerBase::ObjHandleT H,
+ const RTDyldObjectLinkingLayer::ObjectPtr &Obj,
+ const LoadedObjectInfo &Info) const {
M.UnfinalizedSections[H] = std::move(M.SectionsAllocatedSinceLastLoad);
M.SectionsAllocatedSinceLastLoad = SectionAddrSet();
- assert(Objects.size() == Infos.size() &&
- "Incorrect number of Infos for Objects.");
- for (unsigned I = 0; I < Objects.size(); ++I)
- M.MemMgr.notifyObjectLoaded(&M, getObject(*Objects[I]));
+ M.MemMgr.notifyObjectLoaded(&M, *Obj->getBinary());
}
-
private:
- static const object::ObjectFile& getObject(const object::ObjectFile &Obj) {
- return Obj;
- }
-
- template <typename ObjT>
- static const object::ObjectFile&
- getObject(const object::OwningBinary<ObjT> &Obj) {
- return *Obj.getBinary();
- }
-
OrcMCJITReplacement &M;
};
@@ -343,7 +351,7 @@ private:
public:
NotifyFinalizedT(OrcMCJITReplacement &M) : M(M) {}
- void operator()(RTDyldObjectLinkingLayerBase::ObjSetHandleT H) {
+ void operator()(RTDyldObjectLinkingLayerBase::ObjHandleT H) {
M.UnfinalizedSections.erase(H);
}
@@ -360,9 +368,9 @@ private:
return MangledName;
}
- typedef RTDyldObjectLinkingLayer<NotifyObjectLoadedT> ObjectLayerT;
- typedef IRCompileLayer<ObjectLayerT> CompileLayerT;
- typedef LazyEmittingLayer<CompileLayerT> LazyEmitLayerT;
+ using ObjectLayerT = RTDyldObjectLinkingLayer;
+ using CompileLayerT = IRCompileLayer<ObjectLayerT, orc::SimpleCompiler>;
+ using LazyEmitLayerT = LazyEmittingLayer<CompileLayerT>;
std::unique_ptr<TargetMachine> TM;
MCJITReplacementMemMgr MemMgr;
@@ -380,21 +388,24 @@ private:
// We need to store ObjLayerT::ObjSetHandles for each of the object sets
// that have been emitted but not yet finalized so that we can forward the
// mapSectionAddress calls appropriately.
- typedef std::set<const void *> SectionAddrSet;
- struct ObjSetHandleCompare {
- bool operator()(ObjectLayerT::ObjSetHandleT H1,
- ObjectLayerT::ObjSetHandleT H2) const {
+ using SectionAddrSet = std::set<const void *>;
+ struct ObjHandleCompare {
+ bool operator()(ObjectLayerT::ObjHandleT H1,
+ ObjectLayerT::ObjHandleT H2) const {
return &*H1 < &*H2;
}
};
SectionAddrSet SectionsAllocatedSinceLastLoad;
- std::map<ObjectLayerT::ObjSetHandleT, SectionAddrSet, ObjSetHandleCompare>
+ std::map<ObjectLayerT::ObjHandleT, SectionAddrSet, ObjHandleCompare>
UnfinalizedSections;
+ std::map<Module*, bool> ShouldDelete;
+ std::vector<std::shared_ptr<Module>> LocalModules;
std::vector<object::OwningBinary<object::Archive>> Archives;
};
} // end namespace orc
+
} // end namespace llvm
#endif // LLVM_LIB_EXECUTIONENGINE_ORC_MCJITREPLACEMENT_H
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index 556e122ff82ff..c7f112887a306 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -1,5 +1,4 @@
-
-//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===//
+//===- AsmWriter.cpp - Printing LLVM as an assembly file ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,63 +14,105 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/Argument.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
#include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/Use.h"
#include "llvm/IR/UseListOrder.h"
-#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cassert>
#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
// Make virtual table appear in this compilation unit.
-AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
+AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default;
//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//
namespace {
+
struct OrderMap {
DenseMap<const Value *, std::pair<unsigned, bool>> IDs;
unsigned size() const { return IDs.size(); }
std::pair<unsigned, bool> &operator[](const Value *V) { return IDs[V]; }
+
std::pair<unsigned, bool> lookup(const Value *V) const {
return IDs.lookup(V);
}
+
void index(const Value *V) {
// Explicitly sequence get-size and insert-value operations to avoid UB.
unsigned ID = IDs.size() + 1;
IDs[V].first = ID;
}
};
-}
+
+} // end anonymous namespace
static void orderValue(const Value *V, OrderMap &OM) {
if (OM.lookup(V).first)
@@ -139,7 +180,7 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
unsigned ID, const OrderMap &OM,
UseListOrderStack &Stack) {
// Predict use-list order for this one.
- typedef std::pair<const Use *, unsigned> Entry;
+ using Entry = std::pair<const Use *, unsigned>;
SmallVector<Entry, 64> List;
for (const Use &U : V->uses())
// Check if this user will be serialized.
@@ -421,13 +462,10 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) {
isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
}
-
namespace {
+
class TypePrinting {
- TypePrinting(const TypePrinting &) = delete;
- void operator=(const TypePrinting&) = delete;
public:
-
/// NamedTypes - The named types that are used by the current module.
TypeFinder NamedTypes;
@@ -435,6 +473,8 @@ public:
DenseMap<StructType*, unsigned> NumberedTypes;
TypePrinting() = default;
+ TypePrinting(const TypePrinting &) = delete;
+ TypePrinting &operator=(const TypePrinting &) = delete;
void incorporateTypes(const Module &M);
@@ -442,7 +482,8 @@ public:
void printStructBody(StructType *Ty, raw_ostream &OS);
};
-} // namespace
+
+} // end anonymous namespace
void TypePrinting::incorporateTypes(const Module &M) {
NamedTypes.run(M, false);
@@ -574,6 +615,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
}
namespace llvm {
+
//===----------------------------------------------------------------------===//
// SlotTracker Class: Enumerate slot numbers for unnamed values
//===----------------------------------------------------------------------===//
@@ -582,32 +624,33 @@ namespace llvm {
class SlotTracker {
public:
/// ValueMap - A mapping of Values to slot numbers.
- typedef DenseMap<const Value*, unsigned> ValueMap;
+ using ValueMap = DenseMap<const Value *, unsigned>;
private:
/// TheModule - The module for which we are holding slot numbers.
const Module* TheModule;
/// TheFunction - The function for which we are holding slot numbers.
- const Function* TheFunction;
- bool FunctionProcessed;
+ const Function* TheFunction = nullptr;
+ bool FunctionProcessed = false;
bool ShouldInitializeAllMetadata;
/// mMap - The slot map for the module level data.
ValueMap mMap;
- unsigned mNext;
+ unsigned mNext = 0;
/// fMap - The slot map for the function level data.
ValueMap fMap;
- unsigned fNext;
+ unsigned fNext = 0;
/// mdnMap - Map for MDNodes.
DenseMap<const MDNode*, unsigned> mdnMap;
- unsigned mdnNext;
+ unsigned mdnNext = 0;
/// asMap - The slot map for attribute sets.
DenseMap<AttributeSet, unsigned> asMap;
- unsigned asNext;
+ unsigned asNext = 0;
+
public:
/// Construct from a module.
///
@@ -616,6 +659,7 @@ public:
/// within a function (even if no functions have been initialized).
explicit SlotTracker(const Module *M,
bool ShouldInitializeAllMetadata = false);
+
/// Construct from a function, starting out in incorp state.
///
/// If \c ShouldInitializeAllMetadata, initializes all metadata in all
@@ -624,6 +668,9 @@ public:
explicit SlotTracker(const Function *F,
bool ShouldInitializeAllMetadata = false);
+ SlotTracker(const SlotTracker &) = delete;
+ SlotTracker &operator=(const SlotTracker &) = delete;
+
/// Return the slot number of the specified value in it's type
/// plane. If something is not in the SlotTracker, return -1.
int getLocalSlot(const Value *V);
@@ -646,14 +693,16 @@ public:
void purgeFunction();
/// MDNode map iterators.
- typedef DenseMap<const MDNode*, unsigned>::iterator mdn_iterator;
+ using mdn_iterator = DenseMap<const MDNode*, unsigned>::iterator;
+
mdn_iterator mdn_begin() { return mdnMap.begin(); }
mdn_iterator mdn_end() { return mdnMap.end(); }
unsigned mdn_size() const { return mdnMap.size(); }
bool mdn_empty() const { return mdnMap.empty(); }
/// AttributeSet map iterators.
- typedef DenseMap<AttributeSet, unsigned>::iterator as_iterator;
+ using as_iterator = DenseMap<AttributeSet, unsigned>::iterator;
+
as_iterator as_begin() { return asMap.begin(); }
as_iterator as_end() { return asMap.end(); }
unsigned as_size() const { return asMap.size(); }
@@ -691,11 +740,9 @@ private:
/// Add all of the metadata from an instruction.
void processInstructionMetadata(const Instruction &I);
-
- SlotTracker(const SlotTracker &) = delete;
- void operator=(const SlotTracker &) = delete;
};
-} // namespace llvm
+
+} // end namespace llvm
ModuleSlotTracker::ModuleSlotTracker(SlotTracker &Machine, const Module *M,
const Function *F)
@@ -706,7 +753,7 @@ ModuleSlotTracker::ModuleSlotTracker(const Module *M,
: ShouldCreateStorage(M),
ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), M(M) {}
-ModuleSlotTracker::~ModuleSlotTracker() {}
+ModuleSlotTracker::~ModuleSlotTracker() = default;
SlotTracker *ModuleSlotTracker::getMachine() {
if (!ShouldCreateStorage)
@@ -773,17 +820,13 @@ static SlotTracker *createSlotTracker(const Value *V) {
// Module level constructor. Causes the contents of the Module (sans functions)
// to be added to the slot table.
SlotTracker::SlotTracker(const Module *M, bool ShouldInitializeAllMetadata)
- : TheModule(M), TheFunction(nullptr), FunctionProcessed(false),
- ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0),
- fNext(0), mdnNext(0), asNext(0) {}
+ : TheModule(M), ShouldInitializeAllMetadata(ShouldInitializeAllMetadata) {}
// Function level constructor. Causes the contents of the Module and the one
// function provided to be added to the slot table.
SlotTracker::SlotTracker(const Function *F, bool ShouldInitializeAllMetadata)
: TheModule(F ? F->getParent() : nullptr), TheFunction(F),
- FunctionProcessed(false),
- ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0),
- fNext(0), mdnNext(0), asNext(0) {}
+ ShouldInitializeAllMetadata(ShouldInitializeAllMetadata) {}
inline void SlotTracker::initialize() {
if (TheModule) {
@@ -949,7 +992,6 @@ int SlotTracker::getMetadataSlot(const MDNode *N) {
return MI == mdnMap.end() ? -1 : (int)MI->second;
}
-
/// getLocalSlot - Get the slot number for a value that is local to a function.
int SlotTracker::getLocalSlot(const Value *V) {
assert(!isa<Constant>(V) && "Can't get a constant or global slot with this!");
@@ -1248,7 +1290,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
return;
}
-
if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
if (CS->getType()->isPacked())
Out << '<';
@@ -1381,11 +1422,14 @@ static void writeMDTuple(raw_ostream &Out, const MDTuple *Node,
}
namespace {
+
struct FieldSeparator {
- bool Skip;
+ bool Skip = true;
const char *Sep;
- FieldSeparator(const char *Sep = ", ") : Skip(true), Sep(Sep) {}
+
+ FieldSeparator(const char *Sep = ", ") : Sep(Sep) {}
};
+
raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
if (FS.Skip) {
FS.Skip = false;
@@ -1393,19 +1437,20 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
}
return OS << FS.Sep;
}
+
struct MDFieldPrinter {
raw_ostream &Out;
FieldSeparator FS;
- TypePrinting *TypePrinter;
- SlotTracker *Machine;
- const Module *Context;
+ TypePrinting *TypePrinter = nullptr;
+ SlotTracker *Machine = nullptr;
+ const Module *Context = nullptr;
- explicit MDFieldPrinter(raw_ostream &Out)
- : Out(Out), TypePrinter(nullptr), Machine(nullptr), Context(nullptr) {}
+ explicit MDFieldPrinter(raw_ostream &Out) : Out(Out) {}
MDFieldPrinter(raw_ostream &Out, TypePrinting *TypePrinter,
SlotTracker *Machine, const Module *Context)
: Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) {
}
+
void printTag(const DINode *N);
void printMacinfoType(const DIMacroNode *N);
void printChecksumKind(const DIFile *N);
@@ -1422,7 +1467,8 @@ struct MDFieldPrinter {
bool ShouldSkipZero = true);
void printEmissionKind(StringRef Name, DICompileUnit::DebugEmissionKind EK);
};
-} // end namespace
+
+} // end anonymous namespace
void MDFieldPrinter::printTag(const DINode *N) {
Out << FS << "tag: ";
@@ -1518,7 +1564,6 @@ void MDFieldPrinter::printEmissionKind(StringRef Name,
Out << FS << Name << ": " << DICompileUnit::EmissionKindString(EK);
}
-
template <class IntTy, class Stringifier>
void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value,
Stringifier toString, bool ShouldSkipZero) {
@@ -1923,7 +1968,6 @@ static void writeDIImportedEntity(raw_ostream &Out, const DIImportedEntity *N,
Out << ")";
}
-
static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
TypePrinting *TypePrinter,
SlotTracker *Machine,
@@ -2062,6 +2106,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
}
namespace {
+
class AssemblyWriter {
formatted_raw_ostream &Out;
const Module *TheModule;
@@ -2125,7 +2170,8 @@ private:
// intrinsic indicating base and derived pointer names.
void printGCRelocateComment(const GCRelocateInst &Relocate);
};
-} // namespace
+
+} // end anonymous namespace
AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const Module *M, AssemblyAnnotationWriter *AAW,
@@ -2594,7 +2640,6 @@ void AssemblyWriter::printTypeIdentities() {
}
/// printFunction - Print all aspects of a function.
-///
void AssemblyWriter::printFunction(const Function *F) {
// Print out the return type and name.
Out << '\n';
@@ -2730,7 +2775,6 @@ void AssemblyWriter::printFunction(const Function *F) {
/// printArgument - This member is called for every argument that is passed into
/// the function. Simply print it out
-///
void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) {
// Output type...
TypePrinter.print(Arg->getType(), Out);
@@ -2747,7 +2791,6 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) {
}
/// printBasicBlock - This member is called for each basic block in a method.
-///
void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
if (BB->hasName()) { // Print out the label if it exists...
Out << "\n";
@@ -2813,7 +2856,6 @@ void AssemblyWriter::printGCRelocateComment(const GCRelocateInst &Relocate) {
/// printInfoComment - Print a little comment after the instruction indicating
/// which slot it occupies.
-///
void AssemblyWriter::printInfoComment(const Value &V) {
if (const auto *Relocate = dyn_cast<GCRelocateInst>(&V))
printGCRelocateComment(*Relocate);
@@ -3046,7 +3088,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
writeOperandBundles(CI);
-
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
Operand = II->getCalledValue();
FunctionType *FTy = II->getFunctionType();
@@ -3087,7 +3128,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
writeOperand(II->getNormalDest(), true);
Out << " unwind ";
writeOperand(II->getUnwindDest(), true);
-
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
Out << ' ';
if (AI->isUsedWithInAlloca())
@@ -3113,7 +3153,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
if (AddrSpace != 0) {
Out << ", addrspace(" << AddrSpace << ')';
}
-
} else if (isa<CastInst>(I)) {
if (Operand) {
Out << ' ';
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index 4ed7b021883de..9c7b61f679236 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -100,6 +100,7 @@ public:
class EnumAttributeImpl : public AttributeImpl {
virtual void anchor();
+
Attribute::AttrKind Kind;
protected:
@@ -133,6 +134,7 @@ public:
class StringAttributeImpl : public AttributeImpl {
virtual void anchor();
+
std::string Kind;
std::string Val;
@@ -243,7 +245,8 @@ public:
return AvailableFunctionAttrs & ((uint64_t)1) << Kind;
}
- typedef const AttributeSet *iterator;
+ using iterator = const AttributeSet *;
+
iterator begin() const { return getTrailingObjects<AttributeSet>(); }
iterator end() const { return begin() + NumAttrSets; }
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index a518f7b5c81a8..8f2e641d64b92 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -1638,6 +1638,39 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) {
Caller.addFnAttr(Attribute::StackProtect);
}
+/// \brief If the inlined function required stack probes, then ensure that
+/// the calling function has those too.
+static void adjustCallerStackProbes(Function &Caller, const Function &Callee) {
+ if (!Caller.hasFnAttribute("probe-stack") &&
+ Callee.hasFnAttribute("probe-stack")) {
+ Caller.addFnAttr(Callee.getFnAttribute("probe-stack"));
+ }
+}
+
+/// \brief If the inlined function defines the size of guard region
+/// on the stack, then ensure that the calling function defines a guard region
+/// that is no larger.
+static void
+adjustCallerStackProbeSize(Function &Caller, const Function &Callee) {
+ if (Callee.hasFnAttribute("stack-probe-size")) {
+ uint64_t CalleeStackProbeSize;
+ Callee.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, CalleeStackProbeSize);
+ if (Caller.hasFnAttribute("stack-probe-size")) {
+ uint64_t CallerStackProbeSize;
+ Caller.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, CallerStackProbeSize);
+ if (CallerStackProbeSize > CalleeStackProbeSize) {
+ Caller.addFnAttr(Callee.getFnAttribute("stack-probe-size"));
+ }
+ } else {
+ Caller.addFnAttr(Callee.getFnAttribute("stack-probe-size"));
+ }
+ }
+}
+
#define GET_ATTR_COMPAT_FUNC
#include "AttributesCompatFunc.inc"
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index 06934b365a11b..6a4b8032ffd54 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -142,6 +142,11 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
+ Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
+ Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
+ Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
+ Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
+ Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
Name == "avx512.mask.add.pd.128" || // Added in 4.0
Name == "avx512.mask.add.pd.256" || // Added in 4.0
Name == "avx512.mask.add.ps.128" || // Added in 4.0
@@ -783,12 +788,30 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
}
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
- ICmpInst::Predicate Pred) {
+ unsigned CC, bool Signed) {
Value *Op0 = CI.getArgOperand(0);
unsigned NumElts = Op0->getType()->getVectorNumElements();
- Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
- Value *Mask = CI.getArgOperand(2);
+ Value *Cmp;
+ if (CC == 3) {
+ Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
+ } else if (CC == 7) {
+ Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
+ } else {
+ ICmpInst::Predicate Pred;
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case 0: Pred = ICmpInst::ICMP_EQ; break;
+ case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 4: Pred = ICmpInst::ICMP_NE; break;
+ case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ }
+ Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
+ }
+
+ Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
const auto *C = dyn_cast<Constant>(Mask);
if (!C || !C->isAllOnesValue())
Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
@@ -1007,9 +1030,13 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
} else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
bool CmpEq = Name[16] == 'e';
- Rep = upgradeMaskedCompare(Builder, *CI,
- CmpEq ? ICmpInst::ICMP_EQ
- : ICmpInst::ICMP_SGT);
+ Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
+ } else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
+ } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
} else if (IsX86 && (Name == "sse41.pmaxsb" ||
Name == "sse2.pmaxs.w" ||
Name == "sse41.pmaxsd" ||
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 1f8659d4e2cae..2b780adf6c69c 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -355,6 +355,19 @@ bool BasicBlock::canSplitPredecessors() const {
return true;
}
+bool BasicBlock::isLegalToHoistInto() const {
+ auto *Term = getTerminator();
+ // No terminator means the block is under construction.
+ if (!Term)
+ return true;
+
+ // If the block has no successors, there can be no instructions to hoist.
+ assert(Term->getNumSuccessors() > 0);
+
+ // Instructions should not be hoisted across exception handling boundaries.
+ return !Term->isExceptional();
+}
+
/// This splits a basic block into two at the specified
/// instruction. Note that all instructions BEFORE the specified iterator stay
/// as part of the original basic block, an unconditional branch is added to
diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp
index 21d1996ef8514..4bd17257016d7 100644
--- a/lib/IR/ConstantRange.cpp
+++ b/lib/IR/ConstantRange.cpp
@@ -1,4 +1,4 @@
-//===-- ConstantRange.cpp - ConstantRange implementation ------------------===//
+//===- ConstantRange.cpp - ConstantRange implementation -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,12 +21,21 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APInt.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
ConstantRange::ConstantRange(uint32_t BitWidth, bool Full)
@@ -170,7 +179,7 @@ ConstantRange
ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
const ConstantRange &Other,
unsigned NoWrapKind) {
- typedef OverflowingBinaryOperator OBO;
+ using OBO = OverflowingBinaryOperator;
// Computes the intersection of CR0 and CR1. It is different from
// intersectWith in that the ConstantRange returned will only contain elements
@@ -284,27 +293,14 @@ APInt ConstantRange::getUnsignedMin() const {
}
APInt ConstantRange::getSignedMax() const {
- if (!isWrappedSet()) {
- APInt UpperMinusOne = getUpper() - 1;
- if (getLower().sle(UpperMinusOne))
- return UpperMinusOne;
- return APInt::getSignedMaxValue(getBitWidth());
- }
- if (getLower().isNegative() == getUpper().isNegative())
+ if (isFullSet() || Lower.sgt(Upper))
return APInt::getSignedMaxValue(getBitWidth());
return getUpper() - 1;
}
APInt ConstantRange::getSignedMin() const {
- if (!isWrappedSet()) {
- if (getLower().sle(getUpper() - 1))
- return getLower();
+ if (isFullSet() || (Lower.sgt(Upper) && !getUpper().isMinSignedValue()))
return APInt::getSignedMinValue(getBitWidth());
- }
- if ((getUpper() - 1).slt(getLower())) {
- if (!getUpper().isMinSignedValue())
- return APInt::getSignedMinValue(getBitWidth());
- }
return getLower();
}
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 023a0b178a145..a79b00be4ffe8 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -1995,8 +1995,8 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
InsertBefore) {
Op<0>() = S1;
Op<1>() = S2;
- init(iType);
setName(Name);
+ AssertOK();
}
BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
@@ -2008,17 +2008,17 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
InsertAtEnd) {
Op<0>() = S1;
Op<1>() = S2;
- init(iType);
setName(Name);
+ AssertOK();
}
-void BinaryOperator::init(BinaryOps iType) {
+void BinaryOperator::AssertOK() {
Value *LHS = getOperand(0), *RHS = getOperand(1);
(void)LHS; (void)RHS; // Silence warnings.
assert(LHS->getType() == RHS->getType() &&
"Binary operator operand types must match!");
#ifndef NDEBUG
- switch (iType) {
+ switch (getOpcode()) {
case Add: case Sub:
case Mul:
assert(getType() == LHS->getType() &&
@@ -2038,8 +2038,7 @@ void BinaryOperator::init(BinaryOps iType) {
case SDiv:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert((getType()->isIntegerTy() || (getType()->isVectorTy() &&
- cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ assert(getType()->isIntOrIntVectorTy() &&
"Incorrect operand type (not integer) for S/UDIV");
break;
case FDiv:
@@ -2052,8 +2051,7 @@ void BinaryOperator::init(BinaryOps iType) {
case SRem:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert((getType()->isIntegerTy() || (getType()->isVectorTy() &&
- cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ assert(getType()->isIntOrIntVectorTy() &&
"Incorrect operand type (not integer) for S/UREM");
break;
case FRem:
@@ -2067,22 +2065,17 @@ void BinaryOperator::init(BinaryOps iType) {
case AShr:
assert(getType() == LHS->getType() &&
"Shift operation should return same type as operands!");
- assert((getType()->isIntegerTy() ||
- (getType()->isVectorTy() &&
- cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ assert(getType()->isIntOrIntVectorTy() &&
"Tried to create a shift operation on a non-integral type!");
break;
case And: case Or:
case Xor:
assert(getType() == LHS->getType() &&
"Logical operation should return same type as operands!");
- assert((getType()->isIntegerTy() ||
- (getType()->isVectorTy() &&
- cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+ assert(getType()->isIntOrIntVectorTy() &&
"Tried to create a logical operation on a non-integral type!");
break;
- default:
- break;
+ default: llvm_unreachable("Invalid opcode provided");
}
#endif
}
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index 4a30d28c39138..c19e1be44fdc7 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -1,4 +1,4 @@
-//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===//
+//===- LLVMContextImpl.cpp - Implement LLVMContextImpl --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,18 +12,17 @@
//===----------------------------------------------------------------------===//
#include "LLVMContextImpl.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OptBisect.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/ManagedStatic.h"
-#include <algorithm>
+#include <cassert>
+#include <utility>
+
using namespace llvm;
LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
- : TheTrueVal(nullptr), TheFalseVal(nullptr),
- VoidTy(C, Type::VoidTyID),
+ : VoidTy(C, Type::VoidTyID),
LabelTy(C, Type::LabelTyID),
HalfTy(C, Type::HalfTyID),
FloatTy(C, Type::FloatTyID),
@@ -39,17 +38,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
Int16Ty(C, 16),
Int32Ty(C, 32),
Int64Ty(C, 64),
- Int128Ty(C, 128) {
- InlineAsmDiagHandler = nullptr;
- InlineAsmDiagContext = nullptr;
- DiagnosticHandler = nullptr;
- DiagnosticContext = nullptr;
- RespectDiagnosticFilters = false;
- DiagnosticHotnessRequested = false;
- YieldCallback = nullptr;
- YieldOpaqueHandle = nullptr;
- NamedStructTypesUniqueID = 0;
-}
+ Int128Ty(C, 128) {}
LLVMContextImpl::~LLVMContextImpl() {
// NOTE: We need to delete the contents of OwnedModules, but Module's dtor
@@ -156,7 +145,6 @@ void LLVMContextImpl::dropTriviallyDeadConstantArrays() {
C->destroyConstant();
}
}
-
} while (Changed);
}
@@ -165,6 +153,7 @@ void Module::dropTriviallyDeadConstantArrays() {
}
namespace llvm {
+
/// \brief Make MDOperand transparent for hashing.
///
/// This overload of an implementation detail of the hashing library makes
@@ -179,7 +168,8 @@ namespace llvm {
/// does not cause MDOperand to be transparent. In particular, a bare pointer
/// doesn't get hashed before it's combined, whereas \a MDOperand would.
static const Metadata *get_hashable_data(const MDOperand &X) { return X.get(); }
-}
+
+} // end namespace llvm
unsigned MDNodeOpsKey::calculateHash(MDNode *N, unsigned Offset) {
unsigned Hash = hash_combine_range(N->op_begin() + Offset, N->op_end());
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 4ba974409a4fc..4147f71ad9d2c 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -1,4 +1,4 @@
-//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===//
+//===- LLVMContextImpl.h - The LLVMContextImpl opaque class -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,11 +21,16 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Constants.h"
@@ -33,21 +38,26 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/TrackingMDRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/YAMLTraits.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
#include <vector>
namespace llvm {
-class ConstantInt;
class ConstantFP;
-class DiagnosticInfoOptimizationRemark;
-class DiagnosticInfoOptimizationRemarkMissed;
-class DiagnosticInfoOptimizationRemarkAnalysis;
-class GCStrategy;
-class LLVMContext;
+class ConstantInt;
class Type;
class Value;
+class ValueHandleBase;
struct DenseMapAPIntKeyInfo {
static inline APInt getEmptyKey() {
@@ -55,14 +65,17 @@ struct DenseMapAPIntKeyInfo {
V.U.VAL = 0;
return V;
}
+
static inline APInt getTombstoneKey() {
APInt V(nullptr, 0);
V.U.VAL = 1;
return V;
}
+
static unsigned getHashValue(const APInt &Key) {
return static_cast<unsigned>(hash_value(Key));
}
+
static bool isEqual(const APInt &LHS, const APInt &RHS) {
return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
}
@@ -71,9 +84,11 @@ struct DenseMapAPIntKeyInfo {
struct DenseMapAPFloatKeyInfo {
static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); }
static inline APFloat getTombstoneKey() { return APFloat(APFloat::Bogus(), 2); }
+
static unsigned getHashValue(const APFloat &Key) {
return static_cast<unsigned>(hash_value(Key));
}
+
static bool isEqual(const APFloat &LHS, const APFloat &RHS) {
return LHS.bitwiseIsEqual(RHS);
}
@@ -83,10 +98,13 @@ struct AnonStructTypeKeyInfo {
struct KeyTy {
ArrayRef<Type*> ETypes;
bool isPacked;
+
KeyTy(const ArrayRef<Type*>& E, bool P) :
ETypes(E), isPacked(P) {}
+
KeyTy(const StructType *ST)
: ETypes(ST->elements()), isPacked(ST->isPacked()) {}
+
bool operator==(const KeyTy& that) const {
if (isPacked != that.isPacked)
return false;
@@ -98,25 +116,31 @@ struct AnonStructTypeKeyInfo {
return !this->operator==(that);
}
};
+
static inline StructType* getEmptyKey() {
return DenseMapInfo<StructType*>::getEmptyKey();
}
+
static inline StructType* getTombstoneKey() {
return DenseMapInfo<StructType*>::getTombstoneKey();
}
+
static unsigned getHashValue(const KeyTy& Key) {
return hash_combine(hash_combine_range(Key.ETypes.begin(),
Key.ETypes.end()),
Key.isPacked);
}
+
static unsigned getHashValue(const StructType *ST) {
return getHashValue(KeyTy(ST));
}
+
static bool isEqual(const KeyTy& LHS, const StructType *RHS) {
if (RHS == getEmptyKey() || RHS == getTombstoneKey())
return false;
return LHS == KeyTy(RHS);
}
+
static bool isEqual(const StructType *LHS, const StructType *RHS) {
return LHS == RHS;
}
@@ -127,11 +151,13 @@ struct FunctionTypeKeyInfo {
const Type *ReturnType;
ArrayRef<Type*> Params;
bool isVarArg;
+
KeyTy(const Type* R, const ArrayRef<Type*>& P, bool V) :
ReturnType(R), Params(P), isVarArg(V) {}
KeyTy(const FunctionType *FT)
: ReturnType(FT->getReturnType()), Params(FT->params()),
isVarArg(FT->isVarArg()) {}
+
bool operator==(const KeyTy& that) const {
if (ReturnType != that.ReturnType)
return false;
@@ -145,26 +171,32 @@ struct FunctionTypeKeyInfo {
return !this->operator==(that);
}
};
+
static inline FunctionType* getEmptyKey() {
return DenseMapInfo<FunctionType*>::getEmptyKey();
}
+
static inline FunctionType* getTombstoneKey() {
return DenseMapInfo<FunctionType*>::getTombstoneKey();
}
+
static unsigned getHashValue(const KeyTy& Key) {
return hash_combine(Key.ReturnType,
hash_combine_range(Key.Params.begin(),
Key.Params.end()),
Key.isVarArg);
}
+
static unsigned getHashValue(const FunctionType *FT) {
return getHashValue(KeyTy(FT));
}
+
static bool isEqual(const KeyTy& LHS, const FunctionType *RHS) {
if (RHS == getEmptyKey() || RHS == getTombstoneKey())
return false;
return LHS == KeyTy(RHS);
}
+
static bool isEqual(const FunctionType *LHS, const FunctionType *RHS) {
return LHS == RHS;
}
@@ -174,7 +206,6 @@ struct FunctionTypeKeyInfo {
class MDNodeOpsKey {
ArrayRef<Metadata *> RawOps;
ArrayRef<MDOperand> Ops;
-
unsigned Hash;
protected:
@@ -212,14 +243,15 @@ public:
};
template <class NodeTy> struct MDNodeKeyImpl;
-template <class NodeTy> struct MDNodeInfo;
/// Configuration point for MDNodeInfo::isEqual().
template <class NodeTy> struct MDNodeSubsetEqualImpl {
- typedef MDNodeKeyImpl<NodeTy> KeyTy;
+ using KeyTy = MDNodeKeyImpl<NodeTy>;
+
static bool isSubsetEqual(const KeyTy &LHS, const NodeTy *RHS) {
return false;
}
+
static bool isSubsetEqual(const NodeTy *LHS, const NodeTy *RHS) {
return false;
}
@@ -252,7 +284,6 @@ template <> struct MDNodeKeyImpl<DILocation> {
MDNodeKeyImpl(unsigned Line, unsigned Column, Metadata *Scope,
Metadata *InlinedAt)
: Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt) {}
-
MDNodeKeyImpl(const DILocation *L)
: Line(L->getLine()), Column(L->getColumn()), Scope(L->getRawScope()),
InlinedAt(L->getRawInlinedAt()) {}
@@ -261,6 +292,7 @@ template <> struct MDNodeKeyImpl<DILocation> {
return Line == RHS->getLine() && Column == RHS->getColumn() &&
Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt();
}
+
unsigned getHashValue() const {
return hash_combine(Line, Column, Scope, InlinedAt);
}
@@ -270,6 +302,7 @@ template <> struct MDNodeKeyImpl<DILocation> {
template <> struct MDNodeKeyImpl<GenericDINode> : MDNodeOpsKey {
unsigned Tag;
MDString *Header;
+
MDNodeKeyImpl(unsigned Tag, MDString *Header, ArrayRef<Metadata *> DwarfOps)
: MDNodeOpsKey(DwarfOps), Tag(Tag), Header(Header) {}
MDNodeKeyImpl(const GenericDINode *N)
@@ -299,6 +332,7 @@ template <> struct MDNodeKeyImpl<DISubrange> {
bool isKeyOf(const DISubrange *RHS) const {
return Count == RHS->getCount() && LowerBound == RHS->getLowerBound();
}
+
unsigned getHashValue() const { return hash_combine(Count, LowerBound); }
};
@@ -313,6 +347,7 @@ template <> struct MDNodeKeyImpl<DIEnumerator> {
bool isKeyOf(const DIEnumerator *RHS) const {
return Value == RHS->getValue() && Name == RHS->getRawName();
}
+
unsigned getHashValue() const { return hash_combine(Value, Name); }
};
@@ -337,6 +372,7 @@ template <> struct MDNodeKeyImpl<DIBasicType> {
AlignInBits == RHS->getAlignInBits() &&
Encoding == RHS->getEncoding();
}
+
unsigned getHashValue() const {
return hash_combine(Tag, Name, SizeInBits, AlignInBits, Encoding);
}
@@ -384,6 +420,7 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
Flags == RHS->getFlags() &&
ExtraData == RHS->getRawExtraData();
}
+
unsigned getHashValue() const {
// If this is a member inside an ODR type, only hash the type and the name.
// Otherwise the hash will be stronger than
@@ -402,10 +439,12 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
};
template <> struct MDNodeSubsetEqualImpl<DIDerivedType> {
- typedef MDNodeKeyImpl<DIDerivedType> KeyTy;
+ using KeyTy = MDNodeKeyImpl<DIDerivedType>;
+
static bool isSubsetEqual(const KeyTy &LHS, const DIDerivedType *RHS) {
return isODRMember(LHS.Tag, LHS.Scope, LHS.Name, RHS);
}
+
static bool isSubsetEqual(const DIDerivedType *LHS, const DIDerivedType *RHS) {
return isODRMember(LHS->getTag(), LHS->getRawScope(), LHS->getRawName(),
RHS);
@@ -480,6 +519,7 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
TemplateParams == RHS->getRawTemplateParams() &&
Identifier == RHS->getRawIdentifier();
}
+
unsigned getHashValue() const {
// Intentionally computes the hash on a subset of the operands for
// performance reason. The subset has to be significant enough to avoid
@@ -504,6 +544,7 @@ template <> struct MDNodeKeyImpl<DISubroutineType> {
return Flags == RHS->getFlags() && CC == RHS->getCC() &&
TypeArray == RHS->getRawTypeArray();
}
+
unsigned getHashValue() const { return hash_combine(Flags, CC, TypeArray); }
};
@@ -527,6 +568,7 @@ template <> struct MDNodeKeyImpl<DIFile> {
CSKind == RHS->getChecksumKind() &&
Checksum == RHS->getRawChecksum();
}
+
unsigned getHashValue() const {
return hash_combine(Filename, Directory, CSKind, Checksum);
}
@@ -601,6 +643,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
Variables == RHS->getRawVariables() &&
ThrownTypes == RHS->getRawThrownTypes();
}
+
unsigned getHashValue() const {
// If this is a declaration inside an ODR type, only hash the type and the
// name. Otherwise the hash will be stronger than
@@ -619,11 +662,13 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
};
template <> struct MDNodeSubsetEqualImpl<DISubprogram> {
- typedef MDNodeKeyImpl<DISubprogram> KeyTy;
+ using KeyTy = MDNodeKeyImpl<DISubprogram>;
+
static bool isSubsetEqual(const KeyTy &LHS, const DISubprogram *RHS) {
return isDeclarationOfODRMember(LHS.IsDefinition, LHS.Scope,
LHS.LinkageName, LHS.TemplateParams, RHS);
}
+
static bool isSubsetEqual(const DISubprogram *LHS, const DISubprogram *RHS) {
return isDeclarationOfODRMember(LHS->isDefinition(), LHS->getRawScope(),
LHS->getRawLinkageName(),
@@ -672,6 +717,7 @@ template <> struct MDNodeKeyImpl<DILexicalBlock> {
return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
Line == RHS->getLine() && Column == RHS->getColumn();
}
+
unsigned getHashValue() const {
return hash_combine(Scope, File, Line, Column);
}
@@ -692,6 +738,7 @@ template <> struct MDNodeKeyImpl<DILexicalBlockFile> {
return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
Discriminator == RHS->getDiscriminator();
}
+
unsigned getHashValue() const {
return hash_combine(Scope, File, Discriminator);
}
@@ -712,6 +759,7 @@ template <> struct MDNodeKeyImpl<DINamespace> {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
ExportSymbols == RHS->getExportSymbols();
}
+
unsigned getHashValue() const {
return hash_combine(Scope, Name);
}
@@ -723,6 +771,7 @@ template <> struct MDNodeKeyImpl<DIModule> {
MDString *ConfigurationMacros;
MDString *IncludePath;
MDString *ISysRoot;
+
MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *ConfigurationMacros,
MDString *IncludePath, MDString *ISysRoot)
: Scope(Scope), Name(Name), ConfigurationMacros(ConfigurationMacros),
@@ -738,6 +787,7 @@ template <> struct MDNodeKeyImpl<DIModule> {
IncludePath == RHS->getRawIncludePath() &&
ISysRoot == RHS->getRawISysRoot();
}
+
unsigned getHashValue() const {
return hash_combine(Scope, Name,
ConfigurationMacros, IncludePath, ISysRoot);
@@ -755,6 +805,7 @@ template <> struct MDNodeKeyImpl<DITemplateTypeParameter> {
bool isKeyOf(const DITemplateTypeParameter *RHS) const {
return Name == RHS->getRawName() && Type == RHS->getRawType();
}
+
unsigned getHashValue() const { return hash_combine(Name, Type); }
};
@@ -774,6 +825,7 @@ template <> struct MDNodeKeyImpl<DITemplateValueParameter> {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
Type == RHS->getRawType() && Value == RHS->getValue();
}
+
unsigned getHashValue() const { return hash_combine(Tag, Name, Type, Value); }
};
@@ -816,6 +868,7 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
RHS->getRawStaticDataMemberDeclaration() &&
AlignInBits == RHS->getAlignInBits();
}
+
unsigned getHashValue() const {
// We do not use AlignInBits in hashing function here on purpose:
// in most cases this param for local variable is zero (for function param
@@ -856,6 +909,7 @@ template <> struct MDNodeKeyImpl<DILocalVariable> {
Type == RHS->getRawType() && Arg == RHS->getArg() &&
Flags == RHS->getFlags() && AlignInBits == RHS->getAlignInBits();
}
+
unsigned getHashValue() const {
// We do not use AlignInBits in hashing function here on purpose:
// in most cases this param for local variable is zero (for function param
@@ -877,6 +931,7 @@ template <> struct MDNodeKeyImpl<DIExpression> {
bool isKeyOf(const DIExpression *RHS) const {
return Elements == RHS->getElements();
}
+
unsigned getHashValue() const {
return hash_combine_range(Elements.begin(), Elements.end());
}
@@ -895,6 +950,7 @@ template <> struct MDNodeKeyImpl<DIGlobalVariableExpression> {
return Variable == RHS->getRawVariable() &&
Expression == RHS->getRawExpression();
}
+
unsigned getHashValue() const { return hash_combine(Variable, Expression); }
};
@@ -923,6 +979,7 @@ template <> struct MDNodeKeyImpl<DIObjCProperty> {
SetterName == RHS->getRawSetterName() &&
Attributes == RHS->getAttributes() && Type == RHS->getRawType();
}
+
unsigned getHashValue() const {
return hash_combine(Name, File, Line, GetterName, SetterName, Attributes,
Type);
@@ -948,6 +1005,7 @@ template <> struct MDNodeKeyImpl<DIImportedEntity> {
Entity == RHS->getRawEntity() && Line == RHS->getLine() &&
Name == RHS->getRawName();
}
+
unsigned getHashValue() const {
return hash_combine(Tag, Scope, Entity, Line, Name);
}
@@ -969,6 +1027,7 @@ template <> struct MDNodeKeyImpl<DIMacro> {
return MIType == RHS->getMacinfoType() && Line == RHS->getLine() &&
Name == RHS->getRawName() && Value == RHS->getRawValue();
}
+
unsigned getHashValue() const {
return hash_combine(MIType, Line, Name, Value);
}
@@ -991,6 +1050,7 @@ template <> struct MDNodeKeyImpl<DIMacroFile> {
return MIType == RHS->getMacinfoType() && Line == RHS->getLine() &&
File == RHS->getRawFile() && Elements == RHS->getRawElements();
}
+
unsigned getHashValue() const {
return hash_combine(MIType, Line, File, Elements);
}
@@ -998,23 +1058,29 @@ template <> struct MDNodeKeyImpl<DIMacroFile> {
/// \brief DenseMapInfo for MDNode subclasses.
template <class NodeTy> struct MDNodeInfo {
- typedef MDNodeKeyImpl<NodeTy> KeyTy;
- typedef MDNodeSubsetEqualImpl<NodeTy> SubsetEqualTy;
+ using KeyTy = MDNodeKeyImpl<NodeTy>;
+ using SubsetEqualTy = MDNodeSubsetEqualImpl<NodeTy>;
+
static inline NodeTy *getEmptyKey() {
return DenseMapInfo<NodeTy *>::getEmptyKey();
}
+
static inline NodeTy *getTombstoneKey() {
return DenseMapInfo<NodeTy *>::getTombstoneKey();
}
+
static unsigned getHashValue(const KeyTy &Key) { return Key.getHashValue(); }
+
static unsigned getHashValue(const NodeTy *N) {
return KeyTy(N).getHashValue();
}
+
static bool isEqual(const KeyTy &LHS, const NodeTy *RHS) {
if (RHS == getEmptyKey() || RHS == getTombstoneKey())
return false;
return SubsetEqualTy::isSubsetEqual(LHS, RHS) || LHS.isKeyOf(RHS);
}
+
static bool isEqual(const NodeTy *LHS, const NodeTy *RHS) {
if (LHS == RHS)
return true;
@@ -1024,7 +1090,7 @@ template <class NodeTy> struct MDNodeInfo {
}
};
-#define HANDLE_MDNODE_LEAF(CLASS) typedef MDNodeInfo<CLASS> CLASS##Info;
+#define HANDLE_MDNODE_LEAF(CLASS) using CLASS##Info = MDNodeInfo<CLASS>;
#include "llvm/IR/Metadata.def"
/// \brief Map-like storage for metadata attachments.
@@ -1097,24 +1163,24 @@ public:
/// will be automatically deleted if this context is deleted.
SmallPtrSet<Module*, 4> OwnedModules;
- LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler;
- void *InlineAsmDiagContext;
+ LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler = nullptr;
+ void *InlineAsmDiagContext = nullptr;
- LLVMContext::DiagnosticHandlerTy DiagnosticHandler;
- void *DiagnosticContext;
- bool RespectDiagnosticFilters;
- bool DiagnosticHotnessRequested;
+ LLVMContext::DiagnosticHandlerTy DiagnosticHandler = nullptr;
+ void *DiagnosticContext = nullptr;
+ bool RespectDiagnosticFilters = false;
+ bool DiagnosticHotnessRequested = false;
std::unique_ptr<yaml::Output> DiagnosticsOutputFile;
- LLVMContext::YieldCallbackTy YieldCallback;
- void *YieldOpaqueHandle;
+ LLVMContext::YieldCallbackTy YieldCallback = nullptr;
+ void *YieldOpaqueHandle = nullptr;
- typedef DenseMap<APInt, std::unique_ptr<ConstantInt>, DenseMapAPIntKeyInfo>
- IntMapTy;
+ using IntMapTy =
+ DenseMap<APInt, std::unique_ptr<ConstantInt>, DenseMapAPIntKeyInfo>;
IntMapTy IntConstants;
- typedef DenseMap<APFloat, std::unique_ptr<ConstantFP>, DenseMapAPFloatKeyInfo>
- FPMapTy;
+ using FPMapTy =
+ DenseMap<APFloat, std::unique_ptr<ConstantFP>, DenseMapAPFloatKeyInfo>;
FPMapTy FPConstants;
FoldingSet<AttributeImpl> AttrsSet;
@@ -1142,13 +1208,13 @@ public:
DenseMap<Type *, std::unique_ptr<ConstantAggregateZero>> CAZConstants;
- typedef ConstantUniqueMap<ConstantArray> ArrayConstantsTy;
+ using ArrayConstantsTy = ConstantUniqueMap<ConstantArray>;
ArrayConstantsTy ArrayConstants;
- typedef ConstantUniqueMap<ConstantStruct> StructConstantsTy;
+ using StructConstantsTy = ConstantUniqueMap<ConstantStruct>;
StructConstantsTy StructConstants;
- typedef ConstantUniqueMap<ConstantVector> VectorConstantsTy;
+ using VectorConstantsTy = ConstantUniqueMap<ConstantVector>;
VectorConstantsTy VectorConstants;
DenseMap<PointerType *, std::unique_ptr<ConstantPointerNull>> CPNConstants;
@@ -1163,8 +1229,8 @@ public:
ConstantUniqueMap<InlineAsm> InlineAsms;
- ConstantInt *TheTrueVal;
- ConstantInt *TheFalseVal;
+ ConstantInt *TheTrueVal = nullptr;
+ ConstantInt *TheFalseVal = nullptr;
std::unique_ptr<ConstantTokenNone> TheNoneToken;
@@ -1172,7 +1238,6 @@ public:
Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy, TokenTy;
Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
-
/// TypeAllocator - All dynamically allocated types are allocated from this.
/// They live forever until the context is torn down.
@@ -1180,23 +1245,22 @@ public:
DenseMap<unsigned, IntegerType*> IntegerTypes;
- typedef DenseSet<FunctionType *, FunctionTypeKeyInfo> FunctionTypeSet;
+ using FunctionTypeSet = DenseSet<FunctionType *, FunctionTypeKeyInfo>;
FunctionTypeSet FunctionTypes;
- typedef DenseSet<StructType *, AnonStructTypeKeyInfo> StructTypeSet;
+ using StructTypeSet = DenseSet<StructType *, AnonStructTypeKeyInfo>;
StructTypeSet AnonStructTypes;
StringMap<StructType*> NamedStructTypes;
- unsigned NamedStructTypesUniqueID;
+ unsigned NamedStructTypesUniqueID = 0;
DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0
DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
-
/// ValueHandles - This map keeps track of all of the value handles that are
/// watching a Value*. The Value::HasValueHandle bit is used to know
/// whether or not a value has an entry in this map.
- typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
+ using ValueHandlesTy = DenseMap<Value *, ValueHandleBase *>;
ValueHandlesTy ValueHandles;
/// CustomMDKindNames - Map to hold the metadata string to ID mapping.
@@ -1254,6 +1318,6 @@ public:
OptBisect &getOptBisect();
};
-}
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_IR_LLVMCONTEXTIMPL_H
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 92e5798dcf214..ac02ff76c8436 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Metadata.h"
#include "LLVMContextImpl.h"
#include "MetadataImpl.h"
#include "SymbolTableListTraitsImpl.h"
@@ -27,6 +26,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -39,6 +39,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/TrackingMDRef.h"
#include "llvm/IR/Type.h"
@@ -53,6 +54,7 @@
#include <cstdint>
#include <iterator>
#include <tuple>
+#include <type_traits>
#include <utility>
#include <vector>
@@ -233,7 +235,7 @@ void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
return;
// Copy out uses since UseMap will get touched below.
- typedef std::pair<void *, std::pair<OwnerTy, uint64_t>> UseTy;
+ using UseTy = std::pair<void *, std::pair<OwnerTy, uint64_t>>;
SmallVector<UseTy, 8> Uses(UseMap.begin(), UseMap.end());
std::sort(Uses.begin(), Uses.end(), [](const UseTy &L, const UseTy &R) {
return L.second.second < R.second.second;
@@ -286,7 +288,7 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) {
}
// Copy out uses since UseMap could get touched below.
- typedef std::pair<void *, std::pair<OwnerTy, uint64_t>> UseTy;
+ using UseTy = std::pair<void *, std::pair<OwnerTy, uint64_t>>;
SmallVector<UseTy, 8> Uses(UseMap.begin(), UseMap.end());
std::sort(Uses.begin(), Uses.end(), [](const UseTy &L, const UseTy &R) {
return L.second.second < R.second.second;
@@ -758,8 +760,8 @@ static T *uniquifyImpl(T *N, DenseSet<T *, InfoT> &Store) {
}
template <class NodeTy> struct MDNode::HasCachedHash {
- typedef char Yes[1];
- typedef char No[2];
+ using Yes = char[1];
+ using No = char[2];
template <class U, U Val> struct SFINAE {};
template <class U>
@@ -1484,7 +1486,7 @@ void GlobalObject::addTypeMetadata(unsigned Offset, Metadata *TypeID) {
addMetadata(
LLVMContext::MD_type,
*MDTuple::get(getContext(),
- {ConstantAsMetadata::get(llvm::ConstantInt::get(
+ {ConstantAsMetadata::get(ConstantInt::get(
Type::getInt64Ty(getContext()), Offset)),
TypeID}));
}
diff --git a/lib/IR/Statepoint.cpp b/lib/IR/Statepoint.cpp
index 8c3f0f208cc67..18efee2177c34 100644
--- a/lib/IR/Statepoint.cpp
+++ b/lib/IR/Statepoint.cpp
@@ -44,10 +44,22 @@ bool llvm::isGCRelocate(ImmutableCallSite CS) {
return CS.getInstruction() && isa<GCRelocateInst>(CS.getInstruction());
}
+bool llvm::isGCRelocate(const Value *V) {
+ if (auto CS = ImmutableCallSite(V))
+ return isGCRelocate(CS);
+ return false;
+}
+
bool llvm::isGCResult(ImmutableCallSite CS) {
return CS.getInstruction() && isa<GCResultInst>(CS.getInstruction());
}
+bool llvm::isGCResult(const Value *V) {
+ if (auto CS = ImmutableCallSite(V))
+ return isGCResult(CS);
+ return false;
+}
+
bool llvm::isStatepointDirectiveAttr(Attribute Attr) {
return Attr.hasAttribute("statepoint-id") ||
Attr.hasAttribute("statepoint-num-patch-bytes");
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index e4094d44867b2..1efd481b246c8 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -32,7 +32,6 @@
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/LTO/LTO.h"
-#include "llvm/Linker/Linker.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/CachePruning.h"
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index a86fd383003da..562f136a3ce2b 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -45,13 +45,13 @@ add_llvm_library(LLVMMC
MCWasmObjectTargetWriter.cpp
MCWasmStreamer.cpp
MCWin64EH.cpp
+ MCWinCOFFStreamer.cpp
MCWinEH.cpp
MachObjectWriter.cpp
StringTableBuilder.cpp
SubtargetFeature.cpp
WasmObjectWriter.cpp
WinCOFFObjectWriter.cpp
- WinCOFFStreamer.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/MC
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 4d139132df462..30f357826805a 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -633,9 +633,6 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
MCContext &Ctx = Asm.getContext();
if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
- assert(RefB->getKind() == MCSymbolRefExpr::VK_None &&
- "Should not have constructed this");
-
// Let A, B and C being the components of Target and R be the location of
// the fixup. If the fixup is not pcrel, we want to compute (A - B + C).
// If it is pcrel, we want to compute (A - B + C - R).
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 53cdaac3aa54b..92c5da0e9fef9 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -193,14 +193,23 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
// FIXME: This code has some duplication with recordRelocation. We should
// probably merge the two into a single callback that tries to evaluate a
// fixup and records a relocation if one is needed.
+
+ // On error claim to have completely evaluated the fixup, to prevent any
+ // further processing from being done.
const MCExpr *Expr = Fixup.getValue();
+ MCContext &Ctx = getContext();
+ Value = 0;
if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup)) {
- getContext().reportError(Fixup.getLoc(), "expected relocatable expression");
- // Claim to have completely evaluated the fixup, to prevent any further
- // processing from being done.
- Value = 0;
+ Ctx.reportError(Fixup.getLoc(), "expected relocatable expression");
return true;
}
+ if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
+ if (RefB->getKind() != MCSymbolRefExpr::VK_None) {
+ Ctx.reportError(Fixup.getLoc(),
+ "unsupported subtraction of qualified symbol");
+ return true;
+ }
+ }
bool IsPCRel = Backend.getFixupKindInfo(
Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
@@ -254,8 +263,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
// Let the backend adjust the fixup value if necessary, including whether
// we need a relocation.
- Backend.processFixupValue(*this, Layout, Fixup, DF, Target, Value,
- IsResolved);
+ Backend.processFixupValue(*this, Fixup, Target, IsResolved);
return IsResolved;
}
@@ -639,9 +647,9 @@ void MCAssembler::writeSectionData(const MCSection *Sec,
Layout.getSectionAddressSize(Sec));
}
-std::pair<uint64_t, bool> MCAssembler::handleFixup(const MCAsmLayout &Layout,
- MCFragment &F,
- const MCFixup &Fixup) {
+std::tuple<MCValue, uint64_t, bool>
+MCAssembler::handleFixup(const MCAsmLayout &Layout, MCFragment &F,
+ const MCFixup &Fixup) {
// Evaluate the fixup.
MCValue Target;
uint64_t FixedValue;
@@ -654,7 +662,7 @@ std::pair<uint64_t, bool> MCAssembler::handleFixup(const MCAsmLayout &Layout,
getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, IsPCRel,
FixedValue);
}
- return std::make_pair(FixedValue, IsPCRel);
+ return std::make_tuple(Target, FixedValue, IsPCRel);
}
void MCAssembler::layout(MCAsmLayout &Layout) {
@@ -731,9 +739,11 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
for (const MCFixup &Fixup : Fixups) {
uint64_t FixedValue;
bool IsPCRel;
- std::tie(FixedValue, IsPCRel) = handleFixup(Layout, Frag, Fixup);
- getBackend().applyFixup(Fixup, Contents.data(), Contents.size(),
- FixedValue, IsPCRel, getContext());
+ MCValue Target;
+ std::tie(Target, FixedValue, IsPCRel) =
+ handleFixup(Layout, Frag, Fixup);
+ getBackend().applyFixup(*this, Fixup, Target, Contents, FixedValue,
+ IsPCRel);
}
}
}
diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp
index f3d0eb55eecd4..6e0249377a899 100644
--- a/lib/MC/MCFragment.cpp
+++ b/lib/MC/MCFragment.cpp
@@ -307,7 +307,7 @@ raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
} // end namespace llvm
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void MCFragment::dump() {
+LLVM_DUMP_METHOD void MCFragment::dump() const {
raw_ostream &OS = errs();
OS << "<";
@@ -328,9 +328,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() {
case MCFragment::FT_Dummy: OS << "MCDummyFragment"; break;
}
- OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
+ OS << "<MCFragment " << (const void*) this << " LayoutOrder:" << LayoutOrder
<< " Offset:" << Offset
- << " HasInstructions:" << hasInstructions()
+ << " HasInstructions:" << hasInstructions()
<< " BundlePadding:" << static_cast<unsigned>(getBundlePadding()) << ">";
switch (getKind()) {
@@ -382,7 +382,8 @@ LLVM_DUMP_METHOD void MCFragment::dump() {
}
case MCFragment::FT_Fill: {
const MCFillFragment *FF = cast<MCFillFragment>(this);
- OS << " Value:" << FF->getValue() << " Size:" << FF->getSize();
+ OS << " Value:" << static_cast<unsigned>(FF->getValue())
+ << " Size:" << FF->getSize();
break;
}
case MCFragment::FT_Relaxable: {
@@ -395,7 +396,8 @@ LLVM_DUMP_METHOD void MCFragment::dump() {
case MCFragment::FT_Org: {
const MCOrgFragment *OF = cast<MCOrgFragment>(this);
OS << "\n ";
- OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
+ OS << " Offset:" << OF->getOffset()
+ << " Value:" << static_cast<unsigned>(OF->getValue());
break;
}
case MCFragment::FT_Dwarf: {
@@ -445,19 +447,19 @@ LLVM_DUMP_METHOD void MCFragment::dump() {
OS << ">";
}
-LLVM_DUMP_METHOD void MCAssembler::dump() {
+LLVM_DUMP_METHOD void MCAssembler::dump() const{
raw_ostream &OS = errs();
OS << "<MCAssembler\n";
OS << " Sections:[\n ";
- for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
if (it != begin()) OS << ",\n ";
it->dump();
}
OS << "],\n";
OS << " Symbols:[";
- for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
+ for (const_symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
if (it != symbol_begin()) OS << ",\n ";
OS << "(";
it->dump();
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index b961cb3968e86..d141dd6627c46 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -86,7 +86,7 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void MCSection::dump() {
+LLVM_DUMP_METHOD void MCSection::dump() const {
raw_ostream &OS = errs();
OS << "<MCSection";
diff --git a/lib/MC/MCWasmStreamer.cpp b/lib/MC/MCWasmStreamer.cpp
index 59b62b8d37c30..02fa070f0c57d 100644
--- a/lib/MC/MCWasmStreamer.cpp
+++ b/lib/MC/MCWasmStreamer.cpp
@@ -98,18 +98,30 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
case MCSA_WeakDefAutoPrivate:
case MCSA_Invalid:
case MCSA_IndirectSymbol:
+ case MCSA_Hidden:
return false;
+
+ case MCSA_Weak:
+ case MCSA_WeakReference:
+ Symbol->setWeak(true);
+ Symbol->setExternal(true);
+ break;
+
case MCSA_Global:
Symbol->setExternal(true);
break;
+
case MCSA_ELF_TypeFunction:
Symbol->setIsFunction(true);
break;
+
case MCSA_ELF_TypeObject:
Symbol->setIsFunction(false);
break;
+
default:
// unrecognized directive
+ llvm_unreachable("unexpected MCSymbolAttr");
return false;
}
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp
index b4d0d7a87f1d3..bf341bb1f4511 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/MCWinCOFFStreamer.cpp
@@ -1,4 +1,4 @@
-//===- llvm/MC/WinCOFFStreamer.cpp ----------------------------------------===//
+//===- llvm/MC/MCWinCOFFStreamer.cpp --------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -190,7 +190,8 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
<< COFF::SCT_COMPLEX_TYPE_SHIFT);
}
-void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
+void MCWinCOFFStreamer::EmitCOFFSectionIndex(const MCSymbol *Symbol) {
+ visitUsedSymbol(*Symbol);
MCDataFragment *DF = getOrCreateDataFragment();
const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext());
MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, FK_SecRel_2);
@@ -198,8 +199,9 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
DF->getContents().resize(DF->getContents().size() + 2, 0);
}
-void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol,
+void MCWinCOFFStreamer::EmitCOFFSecRel32(const MCSymbol *Symbol,
uint64_t Offset) {
+ visitUsedSymbol(*Symbol);
MCDataFragment *DF = getOrCreateDataFragment();
// Create Symbol A for the relocation relative reference.
const MCExpr *MCE = MCSymbolRefExpr::create(Symbol, getContext());
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index db304c027f991..45534ba182123 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -129,15 +129,15 @@ struct WasmGlobal {
// Information about a single relocation.
struct WasmRelocationEntry {
- uint64_t Offset; // Where is the relocation.
- const MCSymbolWasm *Symbol; // The symbol to relocate with.
- int64_t Addend; // A value to add to the symbol.
- unsigned Type; // The type of the relocation.
- MCSectionWasm *FixupSection;// The section the relocation is targeting.
+ uint64_t Offset; // Where is the relocation.
+ const MCSymbolWasm *Symbol; // The symbol to relocate with.
+ int64_t Addend; // A value to add to the symbol.
+ unsigned Type; // The type of the relocation.
+ const MCSectionWasm *FixupSection;// The section the relocation is targeting.
WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol,
int64_t Addend, unsigned Type,
- MCSectionWasm *FixupSection)
+ const MCSectionWasm *FixupSection)
: Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type),
FixupSection(FixupSection) {}
@@ -156,9 +156,19 @@ struct WasmRelocationEntry {
Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend
<< ", Type=" << Type << ", FixupSection=" << FixupSection;
}
- void dump() const { print(errs()); }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
};
+#if !defined(NDEBUG)
+raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) {
+ Rel.print(OS);
+ return OS;
+}
+#endif
+
class WasmObjectWriter : public MCObjectWriter {
/// Helper struct for containing some precomputed information on symbols.
struct WasmSymbolData {
@@ -229,6 +239,11 @@ private:
void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+ void writeString(const StringRef Str) {
+ encodeULEB128(Str.size(), getStream());
+ writeBytes(Str);
+ }
+
void writeValueType(wasm::ValType Ty) {
encodeSLEB128(int32_t(Ty), getStream());
}
@@ -250,7 +265,8 @@ private:
uint32_t NumFuncImports);
void writeCodeRelocSection();
void writeDataRelocSection(uint64_t DataSectionHeaderSize);
- void writeLinkingMetaDataSection(bool HasStackPointer,
+ void writeLinkingMetaDataSection(ArrayRef<StringRef> WeakSymbols,
+ bool HasStackPointer,
uint32_t StackPointerGlobal);
void applyRelocations(ArrayRef<WasmRelocationEntry> Relocations,
@@ -282,6 +298,7 @@ void WasmObjectWriter::startSection(SectionBookkeeping &Section,
assert((Name != nullptr) == (SectionId == wasm::WASM_SEC_CUSTOM) &&
"Only custom sections can have names");
+ DEBUG(dbgs() << "startSection " << SectionId << ": " << Name << "\n");
encodeULEB128(SectionId, getStream());
Section.SizeOffset = getStream().tell();
@@ -295,8 +312,8 @@ void WasmObjectWriter::startSection(SectionBookkeeping &Section,
// Custom sections in wasm also have a string identifier.
if (SectionId == wasm::WASM_SEC_CUSTOM) {
- encodeULEB128(strlen(Name), getStream());
- writeBytes(Name);
+ assert(Name);
+ writeString(StringRef(Name));
}
}
@@ -307,6 +324,7 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
if (uint32_t(Size) != Size)
report_fatal_error("section size does not fit in a uint32_t");
+ DEBUG(dbgs() << "endSection size=" << Size << "\n");
unsigned Padding = PaddingFor5ByteULEB128(Size);
// Write the final section size to the payload_len field, which follows
@@ -332,7 +350,7 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
bool &IsPCRel, uint64_t &FixedValue) {
- MCSectionWasm &FixupSection = cast<MCSectionWasm>(*Fragment->getParent());
+ const auto &FixupSection = cast<MCSectionWasm>(*Fragment->getParent());
uint64_t C = Target.getConstant();
uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
MCContext &Ctx = Asm.getContext();
@@ -406,9 +424,12 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
}
assert(!IsPCRel);
+ assert(SymA);
+
unsigned Type = getRelocType(Target, Fixup);
WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection);
+ DEBUG(dbgs() << "WasmReloc: " << Rec << "\n");
if (FixupSection.hasInstructions())
CodeRelocations.push_back(Rec);
@@ -453,11 +474,10 @@ static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) {
const MCSymbolWasm *Sym = RelEntry.Symbol;
// For undefined symbols, use a hopefully invalid value.
- if (!Sym->isDefined(false))
+ if (!Sym->isDefined(/*SetUsed=*/false))
return UINT32_MAX;
- MCSectionWasm &Section =
- cast<MCSectionWasm>(RelEntry.Symbol->getSection(false));
+ const auto &Section = cast<MCSectionWasm>(RelEntry.Symbol->getSection(false));
uint64_t Address = Section.getSectionOffset() + RelEntry.Addend;
// Ignore overflow. LLVM allows address arithmetic to silently wrap.
@@ -471,16 +491,23 @@ uint32_t WasmObjectWriter::getRelocationIndexValue(
switch (RelEntry.Type) {
case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
- assert(IndirectSymbolIndices.count(RelEntry.Symbol));
+ if (!IndirectSymbolIndices.count(RelEntry.Symbol))
+ report_fatal_error("symbol not found table index space:" +
+ RelEntry.Symbol->getName());
return IndirectSymbolIndices[RelEntry.Symbol];
case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+ case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
- assert(SymbolIndices.count(RelEntry.Symbol));
+ if (!SymbolIndices.count(RelEntry.Symbol))
+ report_fatal_error("symbol not found function/global index space:" +
+ RelEntry.Symbol->getName());
return SymbolIndices[RelEntry.Symbol];
case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
- assert(TypeIndices.count(RelEntry.Symbol));
+ if (!TypeIndices.count(RelEntry.Symbol))
+ report_fatal_error("symbol not found in type index space:" +
+ RelEntry.Symbol->getName());
return TypeIndices[RelEntry.Symbol];
default:
llvm_unreachable("invalid relocation type");
@@ -497,10 +524,12 @@ void WasmObjectWriter::applyRelocations(
RelEntry.FixupSection->getSectionOffset() +
RelEntry.Offset;
+ DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n");
switch (RelEntry.Type) {
case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
- case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: {
+ case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
+ case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: {
uint32_t Index = getRelocationIndexValue(RelEntry);
WritePatchableSLEB(Stream, Index, Offset);
break;
@@ -526,7 +555,7 @@ void WasmObjectWriter::applyRelocations(
break;
}
default:
- llvm_unreachable("unsupported relocation type");
+ llvm_unreachable("invalid relocation type");
}
}
}
@@ -573,6 +602,7 @@ void WasmObjectWriter::writeTypeSection(
endSection(Section);
}
+
void WasmObjectWriter::writeImportSection(
const SmallVector<WasmImport, 4> &Imports) {
if (Imports.empty())
@@ -583,13 +613,8 @@ void WasmObjectWriter::writeImportSection(
encodeULEB128(Imports.size(), getStream());
for (const WasmImport &Import : Imports) {
- StringRef ModuleName = Import.ModuleName;
- encodeULEB128(ModuleName.size(), getStream());
- writeBytes(ModuleName);
-
- StringRef FieldName = Import.FieldName;
- encodeULEB128(FieldName.size(), getStream());
- writeBytes(FieldName);
+ writeString(Import.ModuleName);
+ writeString(Import.FieldName);
encodeULEB128(Import.Kind, getStream());
@@ -697,11 +722,8 @@ void WasmObjectWriter::writeExportSection(
encodeULEB128(Exports.size(), getStream());
for (const WasmExport &Export : Exports) {
- encodeULEB128(Export.FieldName.size(), getStream());
- writeBytes(Export.FieldName);
-
+ writeString(Export.FieldName);
encodeSLEB128(Export.Kind, getStream());
-
encodeULEB128(Export.Index, getStream());
}
@@ -743,17 +765,7 @@ void WasmObjectWriter::writeCodeSection(
encodeULEB128(Functions.size(), getStream());
for (const WasmFunction &Func : Functions) {
- MCSectionWasm &FuncSection =
- static_cast<MCSectionWasm &>(Func.Sym->getSection());
-
- if (Func.Sym->isVariable())
- report_fatal_error("weak symbols not supported yet");
-
- if (Func.Sym->getOffset() != 0)
- report_fatal_error("function sections must contain one function each");
-
- if (!Func.Sym->getSize())
- report_fatal_error("function symbols must have a size set with .size");
+ auto &FuncSection = static_cast<MCSectionWasm &>(Func.Sym->getSection());
int64_t Size = 0;
if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout))
@@ -761,8 +773,7 @@ void WasmObjectWriter::writeCodeSection(
encodeULEB128(Size, getStream());
- FuncSection.setSectionOffset(getStream().tell() -
- Section.ContentsOffset);
+ FuncSection.setSectionOffset(getStream().tell() - Section.ContentsOffset);
Asm.writeSectionData(&FuncSection, Layout);
}
@@ -815,15 +826,13 @@ void WasmObjectWriter::writeNameSection(
for (const WasmImport &Import : Imports) {
if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) {
encodeULEB128(Index, getStream());
- encodeULEB128(Import.FieldName.size(), getStream());
- writeBytes(Import.FieldName);
+ writeString(Import.FieldName);
++Index;
}
}
for (const WasmFunction &Func : Functions) {
encodeULEB128(Index, getStream());
- encodeULEB128(Func.Sym->getName().size(), getStream());
- writeBytes(Func.Sym->getName());
+ writeString(Func.Sym->getName());
++Index;
}
@@ -868,22 +877,37 @@ void WasmObjectWriter::writeDataRelocSection(uint64_t DataSectionHeaderSize) {
}
void WasmObjectWriter::writeLinkingMetaDataSection(
- bool HasStackPointer, uint32_t StackPointerGlobal) {
- if (!HasStackPointer)
+ ArrayRef<StringRef> WeakSymbols, bool HasStackPointer,
+ uint32_t StackPointerGlobal) {
+ if (!HasStackPointer && WeakSymbols.empty())
return;
+
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_CUSTOM, "linking");
+ SectionBookkeeping SubSection;
- encodeULEB128(1, getStream()); // count
+ if (HasStackPointer) {
+ startSection(SubSection, wasm::WASM_STACK_POINTER);
+ encodeULEB128(StackPointerGlobal, getStream()); // id
+ endSection(SubSection);
+ }
- encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type
- encodeULEB128(StackPointerGlobal, getStream()); // id
+ if (WeakSymbols.size() != 0) {
+ startSection(SubSection, wasm::WASM_SYMBOL_INFO);
+ encodeULEB128(WeakSymbols.size(), getStream());
+ for (const StringRef Export: WeakSymbols) {
+ writeString(Export);
+ encodeULEB128(wasm::WASM_SYMBOL_FLAG_WEAK, getStream());
+ }
+ endSection(SubSection);
+ }
endSection(Section);
}
void WasmObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
+ DEBUG(dbgs() << "WasmObjectWriter::writeObject\n");
MCContext &Ctx = Asm.getContext();
wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32;
@@ -894,6 +918,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
SmallVector<WasmGlobal, 4> Globals;
SmallVector<WasmImport, 4> Imports;
SmallVector<WasmExport, 4> Exports;
+ SmallVector<StringRef, 4> WeakSymbols;
SmallPtrSet<const MCSymbolWasm *, 4> IsAddressTaken;
unsigned NumFuncImports = 0;
unsigned NumGlobalImports = 0;
@@ -902,7 +927,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
bool HasStackPointer = false;
// Populate the IsAddressTaken set.
- for (WasmRelocationEntry RelEntry : CodeRelocations) {
+ for (const WasmRelocationEntry &RelEntry : CodeRelocations) {
switch (RelEntry.Type) {
case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
@@ -912,7 +937,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
break;
}
}
- for (WasmRelocationEntry RelEntry : DataRelocations) {
+ for (const WasmRelocationEntry &RelEntry : DataRelocations) {
switch (RelEntry.Type) {
case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
@@ -975,7 +1000,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
const MCFragment &Frag = *GlobalVars->begin();
if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data)
report_fatal_error("only data supported in .global_variables");
- const MCDataFragment &DataFrag = cast<MCDataFragment>(Frag);
+ const auto &DataFrag = cast<MCDataFragment>(Frag);
if (!DataFrag.getFixups().empty())
report_fatal_error("fixups not supported in .global_variables");
const SmallVectorImpl<char> &Contents = DataFrag.getContents();
@@ -1031,7 +1056,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
const MCFragment &Frag = *StackPtr->begin();
if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data)
report_fatal_error("only data supported in .stack_pointer");
- const MCDataFragment &DataFrag = cast<MCDataFragment>(Frag);
+ const auto &DataFrag = cast<MCDataFragment>(Frag);
if (!DataFrag.getFixups().empty())
report_fatal_error("fixups not supported in .stack_pointer");
const SmallVectorImpl<char> &Contents = DataFrag.getContents();
@@ -1041,14 +1066,30 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
StackPointerGlobal = NumGlobalImports + *(const int32_t *)Contents.data();
}
- // Handle defined symbols.
+ // Handle regular defined and undefined symbols.
for (const MCSymbol &S : Asm.symbols()) {
// Ignore unnamed temporary symbols, which aren't ever exported, imported,
// or used in relocations.
if (S.isTemporary() && S.getName().empty())
continue;
+
+ // Variable references (weak references) are handled in a second pass
+ if (S.isVariable())
+ continue;
+
const auto &WS = static_cast<const MCSymbolWasm &>(S);
+ DEBUG(dbgs() << "MCSymbol: '" << S << "'"
+ << " isDefined=" << S.isDefined() << " isExternal="
+ << S.isExternal() << " isTemporary=" << S.isTemporary()
+ << " isFunction=" << WS.isFunction()
+ << " isWeak=" << WS.isWeak()
+ << " isVariable=" << WS.isVariable() << "\n");
+
+ if (WS.isWeak())
+ WeakSymbols.push_back(WS.getName());
+
unsigned Index;
+
if (WS.isFunction()) {
// Prepare the function's type, if we haven't seen it yet.
WasmFunctionType F;
@@ -1062,6 +1103,14 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
int32_t Type = Pair.first->second;
if (WS.isDefined(/*SetUsed=*/false)) {
+ if (WS.getOffset() != 0)
+ report_fatal_error(
+ "function sections must contain one function each");
+
+ if (WS.getSize() == 0)
+ report_fatal_error(
+ "function symbols must have a size set with .size");
+
// A definition. Take the next available index.
Index = NumFuncImports + Functions.size();
@@ -1072,6 +1121,9 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
SymbolIndices[&WS] = Index;
Functions.push_back(Func);
} else {
+ // Should be no such thing as weak undefined symbol
+ assert(!WS.isVariable());
+
// An import; the index was assigned above.
Index = SymbolIndices.find(&WS)->second;
}
@@ -1085,86 +1137,108 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
if (WS.isTemporary() && !WS.getSize())
continue;
- if (WS.isDefined(false)) {
- if (WS.getOffset() != 0)
- report_fatal_error("data sections must contain one variable each: " +
- WS.getName());
- if (!WS.getSize())
- report_fatal_error("data symbols must have a size set with .size: " +
- WS.getName());
-
- int64_t Size = 0;
- if (!WS.getSize()->evaluateAsAbsolute(Size, Layout))
- report_fatal_error(".size expression must be evaluatable");
-
- MCSectionWasm &DataSection =
- static_cast<MCSectionWasm &>(WS.getSection());
-
- if (uint64_t(Size) != Layout.getSectionFileSize(&DataSection))
- report_fatal_error("data sections must contain at most one variable");
-
- DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment()));
-
- DataSection.setSectionOffset(DataBytes.size());
-
- for (MCSection::iterator I = DataSection.begin(), E = DataSection.end();
- I != E; ++I) {
- const MCFragment &Frag = *I;
- if (Frag.hasInstructions())
- report_fatal_error("only data supported in data sections");
-
- if (const MCAlignFragment *Align = dyn_cast<MCAlignFragment>(&Frag)) {
- if (Align->getValueSize() != 1)
- report_fatal_error("only byte values supported for alignment");
- // If nops are requested, use zeros, as this is the data section.
- uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue();
- uint64_t Size = std::min<uint64_t>(alignTo(DataBytes.size(),
- Align->getAlignment()),
- DataBytes.size() +
- Align->getMaxBytesToEmit());
- DataBytes.resize(Size, Value);
- } else if (const MCFillFragment *Fill =
- dyn_cast<MCFillFragment>(&Frag)) {
- DataBytes.insert(DataBytes.end(), Size, Fill->getValue());
- } else {
- const MCDataFragment &DataFrag = cast<MCDataFragment>(Frag);
- const SmallVectorImpl<char> &Contents = DataFrag.getContents();
-
- DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end());
- }
- }
-
- // For each global, prepare a corresponding wasm global holding its
- // address. For externals these will also be named exports.
- Index = NumGlobalImports + Globals.size();
+ if (!WS.isDefined(/*SetUsed=*/false))
+ continue;
- WasmGlobal Global;
- Global.Type = PtrType;
- Global.IsMutable = false;
- Global.HasImport = false;
- Global.InitialValue = DataSection.getSectionOffset();
- Global.ImportIndex = 0;
- SymbolIndices[&WS] = Index;
- Globals.push_back(Global);
+ if (WS.getOffset() != 0)
+ report_fatal_error("data sections must contain one variable each: " +
+ WS.getName());
+ if (!WS.getSize())
+ report_fatal_error("data symbols must have a size set with .size: " +
+ WS.getName());
+
+ int64_t Size = 0;
+ if (!WS.getSize()->evaluateAsAbsolute(Size, Layout))
+ report_fatal_error(".size expression must be evaluatable");
+
+ auto &DataSection = static_cast<MCSectionWasm &>(WS.getSection());
+
+ if (uint64_t(Size) != Layout.getSectionFileSize(&DataSection))
+ report_fatal_error("data sections must contain at most one variable");
+
+ DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment()));
+
+ DataSection.setSectionOffset(DataBytes.size());
+
+ for (const MCFragment &Frag : DataSection) {
+ if (Frag.hasInstructions())
+ report_fatal_error("only data supported in data sections");
+
+ if (auto *Align = dyn_cast<MCAlignFragment>(&Frag)) {
+ if (Align->getValueSize() != 1)
+ report_fatal_error("only byte values supported for alignment");
+ // If nops are requested, use zeros, as this is the data section.
+ uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue();
+ uint64_t Size = std::min<uint64_t>(alignTo(DataBytes.size(),
+ Align->getAlignment()),
+ DataBytes.size() +
+ Align->getMaxBytesToEmit());
+ DataBytes.resize(Size, Value);
+ } else if (auto *Fill = dyn_cast<MCFillFragment>(&Frag)) {
+ DataBytes.insert(DataBytes.end(), Size, Fill->getValue());
+ } else {
+ const auto &DataFrag = cast<MCDataFragment>(Frag);
+ const SmallVectorImpl<char> &Contents = DataFrag.getContents();
+
+ DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end());
+ }
}
+
+ // For each global, prepare a corresponding wasm global holding its
+ // address. For externals these will also be named exports.
+ Index = NumGlobalImports + Globals.size();
+
+ WasmGlobal Global;
+ Global.Type = PtrType;
+ Global.IsMutable = false;
+ Global.HasImport = false;
+ Global.InitialValue = DataSection.getSectionOffset();
+ Global.ImportIndex = 0;
+ SymbolIndices[&WS] = Index;
+ Globals.push_back(Global);
}
// If the symbol is visible outside this translation unit, export it.
- if (WS.isExternal()) {
- assert(WS.isDefined(false));
+ if (WS.isExternal() && WS.isDefined(/*SetUsed=*/false)) {
WasmExport Export;
Export.FieldName = WS.getName();
Export.Index = Index;
-
if (WS.isFunction())
Export.Kind = wasm::WASM_EXTERNAL_FUNCTION;
else
Export.Kind = wasm::WASM_EXTERNAL_GLOBAL;
-
Exports.push_back(Export);
}
}
+ // Handle weak aliases
+ for (const MCSymbol &S : Asm.symbols()) {
+ if (!S.isVariable())
+ continue;
+ assert(S.isExternal());
+ assert(S.isDefined(/*SetUsed=*/false));
+
+ const auto &WS = static_cast<const MCSymbolWasm &>(S);
+
+ // Find the target symbol of this weak alias
+ const MCExpr *Expr = WS.getVariableValue();
+ auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr);
+ const auto *ResolvedSym = cast<MCSymbolWasm>(&Inner->getSymbol());
+ uint32_t Index = SymbolIndices.find(ResolvedSym)->second;
+ DEBUG(dbgs() << "Weak alias: '" << WS << "' -> '" << ResolvedSym << "' = " << Index << "\n");
+ SymbolIndices[&WS] = Index;
+
+ WasmExport Export;
+ Export.FieldName = WS.getName();
+ Export.Index = Index;
+ if (WS.isFunction())
+ Export.Kind = wasm::WASM_EXTERNAL_FUNCTION;
+ else
+ Export.Kind = wasm::WASM_EXTERNAL_GLOBAL;
+ WeakSymbols.push_back(Export.FieldName);
+ Exports.push_back(Export);
+ }
+
// Add types for indirect function calls.
for (const WasmRelocationEntry &Fixup : CodeRelocations) {
if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB)
@@ -1198,7 +1272,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
writeNameSection(Functions, Imports, NumFuncImports);
writeCodeRelocSection();
writeDataRelocSection(DataSectionHeaderSize);
- writeLinkingMetaDataSection(HasStackPointer, StackPointerGlobal);
+ writeLinkingMetaDataSection(WeakSymbols, HasStackPointer, StackPointerGlobal);
// TODO: Translate the .comment section to the output.
// TODO: Translate debug sections to the output.
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 53dee3e8b9f36..fc5234950391b 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -735,7 +735,6 @@ void WinCOFFObjectWriter::recordRelocation(
COFFSection *Sec = SectionMap[MCSec];
const MCSymbolRefExpr *SymB = Target.getSymB();
- bool CrossSection = false;
if (SymB) {
const MCSymbol *B = &SymB->getSymbol();
@@ -747,28 +746,9 @@ void WinCOFFObjectWriter::recordRelocation(
return;
}
- if (!A.getFragment()) {
- Asm.getContext().reportError(
- Fixup.getLoc(),
- Twine("symbol '") + A.getName() +
- "' can not be undefined in a subtraction expression");
- return;
- }
-
- CrossSection = &A.getSection() != &B->getSection();
-
// Offset of the symbol in the section
int64_t OffsetOfB = Layout.getSymbolOffset(*B);
- // In the case where we have SymbA and SymB, we just need to store the delta
- // between the two symbols. Update FixedValue to account for the delta, and
- // skip recording the relocation.
- if (!CrossSection) {
- int64_t OffsetOfA = Layout.getSymbolOffset(A);
- FixedValue = (OffsetOfA - OffsetOfB) + Target.getConstant();
- return;
- }
-
// Offset of the relocation in the section
int64_t OffsetOfRelocation =
Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
@@ -784,7 +764,7 @@ void WinCOFFObjectWriter::recordRelocation(
Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
// Turn relocations for temporary symbols into section relocations.
- if (A.isTemporary() || CrossSection) {
+ if (A.isTemporary()) {
MCSection *TargetSection = &A.getSection();
assert(
SectionMap.find(TargetSection) != SectionMap.end() &&
@@ -802,7 +782,7 @@ void WinCOFFObjectWriter::recordRelocation(
Reloc.Data.VirtualAddress += Fixup.getOffset();
Reloc.Data.Type = TargetObjectWriter->getRelocType(
- Target, Fixup, CrossSection, Asm.getBackend());
+ Asm.getContext(), Target, Fixup, SymB, Asm.getBackend());
// FIXME: Can anyone explain what this does other than adjust for the size
// of the offset?
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 579c8dde366a0..9a760d86e7e22 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -650,6 +650,23 @@ std::error_code COFFObjectFile::initDebugDirectoryPtr() {
return std::error_code();
}
+std::error_code COFFObjectFile::initLoadConfigPtr() {
+ // Get the RVA of the debug directory. Do nothing if it does not exist.
+ const data_directory *DataEntry;
+ if (getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataEntry))
+ return std::error_code();
+
+ // Do nothing if the RVA is NULL.
+ if (DataEntry->RelativeVirtualAddress == 0)
+ return std::error_code();
+ uintptr_t IntPtr = 0;
+ if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr))
+ return EC;
+
+ LoadConfig = (const void *)IntPtr;
+ return std::error_code();
+}
+
COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC)
: ObjectFile(Binary::ID_COFF, Object), COFFHeader(nullptr),
COFFBigObjHeader(nullptr), PE32Header(nullptr), PE32PlusHeader(nullptr),
@@ -784,6 +801,9 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC)
if ((EC = initDebugDirectoryPtr()))
return;
+ if ((EC = initLoadConfigPtr()))
+ return;
+
EC = std::error_code();
}
diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp
index a6cd5dda12d3e..7bca032a7be1d 100644
--- a/lib/Object/IRSymtab.cpp
+++ b/lib/Object/IRSymtab.cpp
@@ -46,15 +46,15 @@ namespace {
/// Stores the temporary state that is required to build an IR symbol table.
struct Builder {
SmallVector<char, 0> &Symtab;
- SmallVector<char, 0> &Strtab;
+ StringTableBuilder &StrtabBuilder;
+ StringSaver Saver;
- Builder(SmallVector<char, 0> &Symtab, SmallVector<char, 0> &Strtab)
- : Symtab(Symtab), Strtab(Strtab) {}
-
- StringTableBuilder StrtabBuilder{StringTableBuilder::RAW};
-
- BumpPtrAllocator Alloc;
- StringSaver Saver{Alloc};
+ // This ctor initializes a StringSaver using the passed in BumpPtrAllocator.
+ // The StringTableBuilder does not create a copy of any strings added to it,
+ // so this provides somewhere to store any strings that we create.
+ Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder,
+ BumpPtrAllocator &Alloc)
+ : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {}
DenseMap<const Comdat *, unsigned> ComdatMap;
Mangler Mang;
@@ -240,7 +240,7 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
return Err;
COFFLinkerOptsOS.flush();
- setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts);
+ setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts));
// We are about to fill in the header's range fields, so reserve space for it
// and copy it in afterwards.
@@ -251,19 +251,15 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
writeRange(Hdr.Uncommons, Uncommons);
*reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr;
-
- raw_svector_ostream OS(Strtab);
- StrtabBuilder.finalizeInOrder();
- StrtabBuilder.write(OS);
-
return Error::success();
}
} // end anonymous namespace
Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
- SmallVector<char, 0> &Strtab) {
- return Builder(Symtab, Strtab).build(Mods);
+ StringTableBuilder &StrtabBuilder,
+ BumpPtrAllocator &Alloc) {
+ return Builder(Symtab, StrtabBuilder, Alloc).build(Mods);
}
// Upgrade a vector of bitcode modules created by an old version of LLVM by
@@ -285,9 +281,15 @@ static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) {
OwnedMods.push_back(std::move(*MOrErr));
}
- if (Error E = build(Mods, FC.Symtab, FC.Strtab))
+ StringTableBuilder StrtabBuilder(StringTableBuilder::RAW);
+ BumpPtrAllocator Alloc;
+ if (Error E = build(Mods, FC.Symtab, StrtabBuilder, Alloc))
return std::move(E);
+ StrtabBuilder.finalizeInOrder();
+ FC.Strtab.resize(StrtabBuilder.getSize());
+ StrtabBuilder.write((uint8_t *)FC.Strtab.data());
+
FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()},
{FC.Strtab.data(), FC.Strtab.size()}};
return std::move(FC);
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 7804bbe06f83e..2e4da9f15aa13 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -1951,13 +1951,29 @@ MachOObjectFile::section_rel_end(DataRefImpl Sec) const {
return relocation_iterator(RelocationRef(Ret, this));
}
+relocation_iterator MachOObjectFile::extrel_begin() const {
+ DataRefImpl Ret;
+ Ret.d.a = 0; // Would normally be a section index.
+ Ret.d.b = 0; // Index into the external relocations
+ return relocation_iterator(RelocationRef(Ret, this));
+}
+
+relocation_iterator MachOObjectFile::extrel_end() const {
+ MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand();
+ DataRefImpl Ret;
+ Ret.d.a = 0; // Would normally be a section index.
+ Ret.d.b = DysymtabLoadCmd.nextrel; // Index into the external relocations
+ return relocation_iterator(RelocationRef(Ret, this));
+}
+
void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
++Rel.d.b;
}
uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const {
- assert(getHeader().filetype == MachO::MH_OBJECT &&
- "Only implemented for MH_OBJECT");
+ assert((getHeader().filetype == MachO::MH_OBJECT ||
+ getHeader().filetype == MachO::MH_KEXT_BUNDLE) &&
+ "Only implemented for MH_OBJECT && MH_KEXT_BUNDLE");
MachO::any_relocation_info RE = getRelocation(Rel);
return getAnyRelocationAddress(RE);
}
@@ -4086,15 +4102,20 @@ MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const {
MachO::any_relocation_info
MachOObjectFile::getRelocation(DataRefImpl Rel) const {
- DataRefImpl Sec;
- Sec.d.a = Rel.d.a;
uint32_t Offset;
- if (is64Bit()) {
- MachO::section_64 Sect = getSection64(Sec);
- Offset = Sect.reloff;
+ if (getHeader().filetype == MachO::MH_OBJECT) {
+ DataRefImpl Sec;
+ Sec.d.a = Rel.d.a;
+ if (is64Bit()) {
+ MachO::section_64 Sect = getSection64(Sec);
+ Offset = Sect.reloff;
+ } else {
+ MachO::section Sect = getSection(Sec);
+ Offset = Sect.reloff;
+ }
} else {
- MachO::section Sect = getSection(Sec);
- Offset = Sect.reloff;
+ MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand();
+ Offset = DysymtabLoadCmd.extreloff; // Offset to the external relocations
}
auto P = reinterpret_cast<const MachO::any_relocation_info *>(
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index 2304098c1dc9f..d15860674aeb9 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -28,6 +28,8 @@
#include <cstring>
#include <system_error>
+#define DEBUG_TYPE "wasm-object"
+
using namespace llvm;
using namespace object;
@@ -256,6 +258,7 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) {
while (Ptr < End) {
uint8_t Type = readVarint7(Ptr);
uint32_t Size = readVaruint32(Ptr);
+ const uint8_t *SubSectionEnd = Ptr + Size;
switch (Type) {
case wasm::WASM_NAMES_FUNCTION: {
uint32_t Count = readVaruint32(Ptr);
@@ -275,6 +278,9 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) {
Ptr += Size;
break;
}
+ if (Ptr != SubSectionEnd)
+ return make_error<GenericBinaryError>("Name sub-section ended prematurely",
+ object_error::parse_failed);
}
if (Ptr != End)
@@ -283,6 +289,50 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) {
return Error::success();
}
+Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr,
+ const uint8_t *End) {
+ while (Ptr < End) {
+ uint8_t Type = readVarint7(Ptr);
+ uint32_t Size = readVaruint32(Ptr);
+ const uint8_t *SubSectionEnd = Ptr + Size;
+ switch (Type) {
+ case wasm::WASM_SYMBOL_INFO: {
+ uint32_t Count = readVaruint32(Ptr);
+ while (Count--) {
+ StringRef Symbol = readString(Ptr);
+ DEBUG(dbgs() << "reading syminfo: " << Symbol << "\n");
+ uint32_t Flags = readVaruint32(Ptr);
+ auto iter = SymbolMap.find(Symbol);
+ if (iter == SymbolMap.end()) {
+ return make_error<GenericBinaryError>(
+ "Invalid symbol name in linking section",
+ object_error::parse_failed);
+ }
+ uint32_t SymIndex = iter->second;
+ assert(SymIndex < Symbols.size());
+ Symbols[SymIndex].Flags = Flags;
+ DEBUG(dbgs() << "Set symbol flags index:"
+ << SymIndex << " name:"
+ << Symbols[SymIndex].Name << " exptected:"
+ << Symbol << " flags: " << Flags << "\n");
+ }
+ break;
+ }
+ case wasm::WASM_STACK_POINTER:
+ default:
+ Ptr += Size;
+ break;
+ }
+ if (Ptr != SubSectionEnd)
+ return make_error<GenericBinaryError>(
+ "Linking sub-section ended prematurely", object_error::parse_failed);
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Linking section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
WasmSection* WasmObjectFile::findCustomSectionByName(StringRef Name) {
for (WasmSection& Section : Sections) {
if (Section.Type == wasm::WASM_SEC_CUSTOM && Section.Name == Name)
@@ -325,6 +375,7 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr,
case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
+ case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
break;
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
@@ -332,7 +383,8 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr,
Reloc.Addend = readVarint32(Ptr);
break;
default:
- return make_error<GenericBinaryError>("Bad relocation type",
+ return make_error<GenericBinaryError>("Bad relocation type: " +
+ Twine(Reloc.Type),
object_error::parse_failed);
}
Section->Relocations.push_back(Reloc);
@@ -349,6 +401,9 @@ Error WasmObjectFile::parseCustomSection(WasmSection &Sec,
if (Sec.Name == "name") {
if (Error Err = parseNameSection(Ptr, End))
return Err;
+ } else if (Sec.Name == "linking") {
+ if (Error Err = parseLinkingSection(Ptr, End))
+ return Err;
} else if (Sec.Name.startswith("reloc.")) {
if (Error Err = parseRelocSection(Sec.Name, Ptr, End))
return Err;
@@ -400,14 +455,20 @@ Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End)
switch (Im.Kind) {
case wasm::WASM_EXTERNAL_FUNCTION:
Im.SigIndex = readVaruint32(Ptr);
+ SymbolMap.try_emplace(Im.Field, Symbols.size());
Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::FUNCTION_IMPORT,
Sections.size(), i);
+ DEBUG(dbgs() << "Adding import: " << Symbols.back()
+ << " sym index:" << Symbols.size() << "\n");
break;
case wasm::WASM_EXTERNAL_GLOBAL:
Im.Global.Type = readVarint7(Ptr);
Im.Global.Mutable = readVaruint1(Ptr);
+ SymbolMap.try_emplace(Im.Field, Symbols.size());
Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT,
Sections.size(), i);
+ DEBUG(dbgs() << "Adding import: " << Symbols.back()
+ << " sym index:" << Symbols.size() << "\n");
break;
case wasm::WASM_EXTERNAL_MEMORY:
Im.Memory = readLimits(Ptr);
@@ -496,15 +557,20 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End)
Ex.Name = readString(Ptr);
Ex.Kind = readUint8(Ptr);
Ex.Index = readVaruint32(Ptr);
- Exports.push_back(Ex);
switch (Ex.Kind) {
case wasm::WASM_EXTERNAL_FUNCTION:
+ SymbolMap.try_emplace(Ex.Name, Symbols.size());
Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::FUNCTION_EXPORT,
Sections.size(), i);
+ DEBUG(dbgs() << "Adding export: " << Symbols.back()
+ << " sym index:" << Symbols.size() << "\n");
break;
case wasm::WASM_EXTERNAL_GLOBAL:
+ SymbolMap.try_emplace(Ex.Name, Symbols.size());
Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::GLOBAL_EXPORT,
Sections.size(), i);
+ DEBUG(dbgs() << "Adding export: " << Symbols.back()
+ << " sym index:" << Symbols.size() << "\n");
break;
case wasm::WASM_EXTERNAL_MEMORY:
case wasm::WASM_EXTERNAL_TABLE:
@@ -513,6 +579,7 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End)
return make_error<GenericBinaryError>(
"Unexpected export kind", object_error::parse_failed);
}
+ Exports.push_back(Ex);
}
if (Ptr != End)
return make_error<GenericBinaryError>("Export section ended prematurely",
@@ -620,6 +687,10 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const {
uint32_t Result = SymbolRef::SF_None;
const WasmSymbol &Sym = getWasmSymbol(Symb);
+ DEBUG(dbgs() << "getSymbolFlags: ptr=" << &Sym << " " << Sym << "\n");
+ if (Sym.Flags & wasm::WASM_SYMBOL_FLAG_WEAK)
+ Result |= SymbolRef::SF_Weak;
+
switch (Sym.Type) {
case WasmSymbol::SymbolType::FUNCTION_IMPORT:
Result |= SymbolRef::SF_Undefined | SymbolRef::SF_Executable;
@@ -629,6 +700,7 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const {
break;
case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME:
Result |= SymbolRef::SF_Executable;
+ Result |= SymbolRef::SF_FormatSpecific;
break;
case WasmSymbol::SymbolType::GLOBAL_IMPORT:
Result |= SymbolRef::SF_Undefined;
@@ -662,8 +734,7 @@ const WasmSymbol &WasmObjectFile::getWasmSymbol(const SymbolRef &Symb) const {
}
Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const {
- const WasmSymbol &Sym = getWasmSymbol(Symb);
- return Sym.Name;
+ return getWasmSymbol(Symb).Name;
}
Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
@@ -671,8 +742,18 @@ Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
}
uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
- const WasmSymbol &Sym = getWasmSymbol(Symb);
- return Sym.ElementIndex;
+ const WasmSymbol& Sym = getWasmSymbol(Symb);
+ switch (Sym.Type) {
+ case WasmSymbol::SymbolType::FUNCTION_IMPORT:
+ case WasmSymbol::SymbolType::GLOBAL_IMPORT:
+ return 0;
+ case WasmSymbol::SymbolType::FUNCTION_EXPORT:
+ case WasmSymbol::SymbolType::GLOBAL_EXPORT:
+ return Exports[Sym.ElementIndex].Index;
+ case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME:
+ return Sym.ElementIndex;
+ }
+ llvm_unreachable("invalid symbol type");
}
uint32_t WasmObjectFile::getSymbolAlignment(DataRefImpl Symb) const {
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
index 3f6080d48f9d1..ff9b9ca35eb5b 100644
--- a/lib/Object/WindowsResource.cpp
+++ b/lib/Object/WindowsResource.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/WindowsResource.h"
-#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/MathExtras.h"
@@ -21,6 +20,9 @@
#include <sstream>
#include <system_error>
+using namespace llvm;
+using namespace object;
+
namespace llvm {
namespace object {
@@ -34,23 +36,19 @@ const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t);
// 8-byte because it makes everyone happy.
const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t);
-static const size_t ResourceMagicSize = 16;
-
-static const size_t NullEntrySize = 16;
-
uint32_t WindowsResourceParser::TreeNode::StringCount = 0;
uint32_t WindowsResourceParser::TreeNode::DataCount = 0;
WindowsResource::WindowsResource(MemoryBufferRef Source)
: Binary(Binary::ID_WinRes, Source) {
- size_t LeadingSize = ResourceMagicSize + NullEntrySize;
+ size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE;
BBS = BinaryByteStream(Data.getBuffer().drop_front(LeadingSize),
support::little);
}
Expected<std::unique_ptr<WindowsResource>>
WindowsResource::createWindowsResource(MemoryBufferRef Source) {
- if (Source.getBufferSize() < ResourceMagicSize + NullEntrySize)
+ if (Source.getBufferSize() < WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE)
return make_error<GenericBinaryError>(
"File too small to be a resource file",
object_error::invalid_file_type);
@@ -103,12 +101,10 @@ static Error readStringOrId(BinaryStreamReader &Reader, uint16_t &ID,
}
Error ResourceEntryRef::loadNext() {
- uint32_t DataSize;
- RETURN_IF_ERROR(Reader.readInteger(DataSize));
- uint32_t HeaderSize;
- RETURN_IF_ERROR(Reader.readInteger(HeaderSize));
+ const WinResHeaderPrefix *Prefix;
+ RETURN_IF_ERROR(Reader.readObject(Prefix));
- if (HeaderSize < MIN_HEADER_SIZE)
+ if (Prefix->HeaderSize < MIN_HEADER_SIZE)
return make_error<GenericBinaryError>("Header size is too small.",
object_error::parse_failed);
@@ -116,13 +112,13 @@ Error ResourceEntryRef::loadNext() {
RETURN_IF_ERROR(readStringOrId(Reader, NameID, Name, IsStringName));
- RETURN_IF_ERROR(Reader.padToAlignment(sizeof(uint32_t)));
+ RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_HEADER_ALIGNMENT));
RETURN_IF_ERROR(Reader.readObject(Suffix));
- RETURN_IF_ERROR(Reader.readArray(Data, DataSize));
+ RETURN_IF_ERROR(Reader.readArray(Data, Prefix->DataSize));
- RETURN_IF_ERROR(Reader.padToAlignment(sizeof(uint32_t)));
+ RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_DATA_ALIGNMENT));
return Error::success();
}
@@ -246,14 +242,14 @@ WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef,
std::string NameString;
ArrayRef<UTF16> CorrectedName;
std::vector<UTF16> EndianCorrectedName;
- if (llvm::sys::IsBigEndianHost) {
+ if (sys::IsBigEndianHost) {
EndianCorrectedName.resize(NameRef.size() + 1);
std::copy(NameRef.begin(), NameRef.end(), EndianCorrectedName.begin() + 1);
EndianCorrectedName[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED;
CorrectedName = makeArrayRef(EndianCorrectedName);
} else
CorrectedName = NameRef;
- llvm::convertUTF16ToUTF8String(CorrectedName, NameString);
+ convertUTF16ToUTF8String(CorrectedName, NameString);
auto Child = StringChildren.find(NameString);
if (Child == StringChildren.end()) {
@@ -282,17 +278,17 @@ void WindowsResourceParser::TreeNode::print(ScopedPrinter &Writer,
// the directory strings or the relocations of the .rsrc section.
uint32_t WindowsResourceParser::TreeNode::getTreeSize() const {
uint32_t Size = (IDChildren.size() + StringChildren.size()) *
- sizeof(llvm::object::coff_resource_dir_entry);
+ sizeof(coff_resource_dir_entry);
// Reached a node pointing to a data entry.
if (IsDataNode) {
- Size += sizeof(llvm::object::coff_resource_data_entry);
+ Size += sizeof(coff_resource_data_entry);
return Size;
}
// If the node does not point to data, it must have a directory table pointing
// to other nodes.
- Size += sizeof(llvm::object::coff_resource_dir_table);
+ Size += sizeof(coff_resource_dir_table);
for (auto const &Child : StringChildren) {
Size += Child.second->getTreeSize();
@@ -305,9 +301,9 @@ uint32_t WindowsResourceParser::TreeNode::getTreeSize() const {
class WindowsResourceCOFFWriter {
public:
- WindowsResourceCOFFWriter(StringRef OutputFile, Machine MachineType,
+ WindowsResourceCOFFWriter(COFF::MachineTypes MachineType,
const WindowsResourceParser &Parser, Error &E);
- Error write();
+ std::unique_ptr<MemoryBuffer> write();
private:
void performFileLayout();
@@ -323,10 +319,10 @@ private:
void writeDirectoryTree();
void writeDirectoryStringTable();
void writeFirstSectionRelocations();
- std::unique_ptr<FileOutputBuffer> Buffer;
- uint8_t *BufferStart;
+ std::unique_ptr<MemoryBuffer> OutputBuffer;
+ char *BufferStart;
uint64_t CurrentOffset = 0;
- Machine MachineType;
+ COFF::MachineTypes MachineType;
const WindowsResourceParser::TreeNode &Resources;
const ArrayRef<std::vector<uint8_t>> Data;
uint64_t FileSize;
@@ -343,28 +339,21 @@ private:
};
WindowsResourceCOFFWriter::WindowsResourceCOFFWriter(
- StringRef OutputFile, Machine MachineType,
- const WindowsResourceParser &Parser, Error &E)
+ COFF::MachineTypes MachineType, const WindowsResourceParser &Parser,
+ Error &E)
: MachineType(MachineType), Resources(Parser.getTree()),
Data(Parser.getData()), StringTable(Parser.getStringTable()) {
performFileLayout();
- ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
- FileOutputBuffer::create(OutputFile, FileSize);
- if (!BufferOrErr) {
- E = errorCodeToError(BufferOrErr.getError());
- return;
- }
-
- Buffer = std::move(*BufferOrErr);
+ OutputBuffer = MemoryBuffer::getNewMemBuffer(FileSize);
}
void WindowsResourceCOFFWriter::performFileLayout() {
// Add size of COFF header.
- FileSize = llvm::COFF::Header16Size;
+ FileSize = COFF::Header16Size;
// one .rsrc section header for directory tree, another for resource data.
- FileSize += 2 * llvm::COFF::SectionSize;
+ FileSize += 2 * COFF::SectionSize;
performSectionOneLayout();
performSectionTwoLayout();
@@ -372,9 +361,9 @@ void WindowsResourceCOFFWriter::performFileLayout() {
// We have reached the address of the symbol table.
SymbolTableOffset = FileSize;
- FileSize += llvm::COFF::Symbol16Size; // size of the @feat.00 symbol.
- FileSize += 4 * llvm::COFF::Symbol16Size; // symbol + aux for each section.
- FileSize += Data.size() * llvm::COFF::Symbol16Size; // 1 symbol per resource.
+ FileSize += COFF::Symbol16Size; // size of the @feat.00 symbol.
+ FileSize += 4 * COFF::Symbol16Size; // symbol + aux for each section.
+ FileSize += Data.size() * COFF::Symbol16Size; // 1 symbol per resource.
FileSize += 4; // four null bytes for the string table.
}
@@ -395,8 +384,8 @@ void WindowsResourceCOFFWriter::performSectionOneLayout() {
// account for the relocations of section one.
SectionOneRelocations = FileSize + SectionOneSize;
FileSize += SectionOneSize;
- FileSize += Data.size() *
- llvm::COFF::RelocationSize; // one relocation for each resource.
+ FileSize +=
+ Data.size() * COFF::RelocationSize; // one relocation for each resource.
FileSize = alignTo(FileSize, SECTION_ALIGNMENT);
}
@@ -407,7 +396,7 @@ void WindowsResourceCOFFWriter::performSectionTwoLayout() {
SectionTwoSize = 0;
for (auto const &Entry : Data) {
DataOffsets.push_back(SectionTwoSize);
- SectionTwoSize += llvm::alignTo(Entry.size(), sizeof(uint64_t));
+ SectionTwoSize += alignTo(Entry.size(), sizeof(uint64_t));
}
FileSize += SectionTwoSize;
FileSize = alignTo(FileSize, SECTION_ALIGNMENT);
@@ -420,8 +409,8 @@ static std::time_t getTime() {
return Now;
}
-Error WindowsResourceCOFFWriter::write() {
- BufferStart = Buffer->getBufferStart();
+std::unique_ptr<MemoryBuffer> WindowsResourceCOFFWriter::write() {
+ BufferStart = const_cast<char *>(OutputBuffer->getBufferStart());
writeCOFFHeader();
writeFirstSectionHeader();
@@ -431,29 +420,24 @@ Error WindowsResourceCOFFWriter::write() {
writeSymbolTable();
writeStringTable();
- if (auto EC = Buffer->commit()) {
- return errorCodeToError(EC);
- }
-
- return Error::success();
+ return std::move(OutputBuffer);
}
void WindowsResourceCOFFWriter::writeCOFFHeader() {
// Write the COFF header.
- auto *Header =
- reinterpret_cast<llvm::object::coff_file_header *>(BufferStart);
+ auto *Header = reinterpret_cast<coff_file_header *>(BufferStart);
switch (MachineType) {
- case Machine::ARM:
- Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT;
+ case COFF::IMAGE_FILE_MACHINE_ARMNT:
+ Header->Machine = COFF::IMAGE_FILE_MACHINE_ARMNT;
break;
- case Machine::X64:
- Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_AMD64;
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ Header->Machine = COFF::IMAGE_FILE_MACHINE_AMD64;
break;
- case Machine::X86:
- Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_I386;
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ Header->Machine = COFF::IMAGE_FILE_MACHINE_I386;
break;
default:
- Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
+ Header->Machine = COFF::IMAGE_FILE_MACHINE_UNKNOWN;
}
Header->NumberOfSections = 2;
Header->TimeDateStamp = getTime();
@@ -461,15 +445,15 @@ void WindowsResourceCOFFWriter::writeCOFFHeader() {
// One symbol for every resource plus 2 for each section and @feat.00
Header->NumberOfSymbols = Data.size() + 5;
Header->SizeOfOptionalHeader = 0;
- Header->Characteristics = llvm::COFF::IMAGE_FILE_32BIT_MACHINE;
+ Header->Characteristics = COFF::IMAGE_FILE_32BIT_MACHINE;
}
void WindowsResourceCOFFWriter::writeFirstSectionHeader() {
// Write the first section header.
- CurrentOffset += sizeof(llvm::object::coff_file_header);
- auto *SectionOneHeader = reinterpret_cast<llvm::object::coff_section *>(
- BufferStart + CurrentOffset);
- strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)llvm::COFF::NameSize);
+ CurrentOffset += sizeof(coff_file_header);
+ auto *SectionOneHeader =
+ reinterpret_cast<coff_section *>(BufferStart + CurrentOffset);
+ strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)COFF::NameSize);
SectionOneHeader->VirtualSize = 0;
SectionOneHeader->VirtualAddress = 0;
SectionOneHeader->SizeOfRawData = SectionOneSize;
@@ -478,19 +462,16 @@ void WindowsResourceCOFFWriter::writeFirstSectionHeader() {
SectionOneHeader->PointerToLinenumbers = 0;
SectionOneHeader->NumberOfRelocations = Data.size();
SectionOneHeader->NumberOfLinenumbers = 0;
- SectionOneHeader->Characteristics = llvm::COFF::IMAGE_SCN_ALIGN_1BYTES;
- SectionOneHeader->Characteristics +=
- llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
- SectionOneHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_DISCARDABLE;
- SectionOneHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_READ;
+ SectionOneHeader->Characteristics += COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
+ SectionOneHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ;
}
void WindowsResourceCOFFWriter::writeSecondSectionHeader() {
// Write the second section header.
- CurrentOffset += sizeof(llvm::object::coff_section);
- auto *SectionTwoHeader = reinterpret_cast<llvm::object::coff_section *>(
- BufferStart + CurrentOffset);
- strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)llvm::COFF::NameSize);
+ CurrentOffset += sizeof(coff_section);
+ auto *SectionTwoHeader =
+ reinterpret_cast<coff_section *>(BufferStart + CurrentOffset);
+ strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)COFF::NameSize);
SectionTwoHeader->VirtualSize = 0;
SectionTwoHeader->VirtualAddress = 0;
SectionTwoHeader->SizeOfRawData = SectionTwoSize;
@@ -499,14 +480,13 @@ void WindowsResourceCOFFWriter::writeSecondSectionHeader() {
SectionTwoHeader->PointerToLinenumbers = 0;
SectionTwoHeader->NumberOfRelocations = 0;
SectionTwoHeader->NumberOfLinenumbers = 0;
- SectionTwoHeader->Characteristics =
- llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
- SectionTwoHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_READ;
+ SectionTwoHeader->Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
+ SectionTwoHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ;
}
void WindowsResourceCOFFWriter::writeFirstSection() {
// Write section one.
- CurrentOffset += sizeof(llvm::object::coff_section);
+ CurrentOffset += sizeof(coff_section);
writeDirectoryTree();
writeDirectoryStringTable();
@@ -529,70 +509,65 @@ void WindowsResourceCOFFWriter::writeSecondSection() {
void WindowsResourceCOFFWriter::writeSymbolTable() {
// Now write the symbol table.
// First, the feat symbol.
- auto *Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart +
- CurrentOffset);
- strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)llvm::COFF::NameSize);
+ auto *Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
+ strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)COFF::NameSize);
Symbol->Value = 0x11;
Symbol->SectionNumber = 0xffff;
- Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
- Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
+ Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
+ Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
Symbol->NumberOfAuxSymbols = 0;
- CurrentOffset += sizeof(llvm::object::coff_symbol16);
+ CurrentOffset += sizeof(coff_symbol16);
// Now write the .rsrc1 symbol + aux.
- Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart +
- CurrentOffset);
- strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)llvm::COFF::NameSize);
+ Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
+ strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)COFF::NameSize);
Symbol->Value = 0;
Symbol->SectionNumber = 1;
- Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
- Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
+ Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
+ Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
Symbol->NumberOfAuxSymbols = 1;
- CurrentOffset += sizeof(llvm::object::coff_symbol16);
- auto *Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>(
- BufferStart + CurrentOffset);
+ CurrentOffset += sizeof(coff_symbol16);
+ auto *Aux = reinterpret_cast<coff_aux_section_definition *>(BufferStart +
+ CurrentOffset);
Aux->Length = SectionOneSize;
Aux->NumberOfRelocations = Data.size();
Aux->NumberOfLinenumbers = 0;
Aux->CheckSum = 0;
Aux->NumberLowPart = 0;
Aux->Selection = 0;
- CurrentOffset += sizeof(llvm::object::coff_aux_section_definition);
+ CurrentOffset += sizeof(coff_aux_section_definition);
// Now write the .rsrc2 symbol + aux.
- Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart +
- CurrentOffset);
- strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)llvm::COFF::NameSize);
+ Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
+ strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)COFF::NameSize);
Symbol->Value = 0;
Symbol->SectionNumber = 2;
- Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
- Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
+ Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
+ Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
Symbol->NumberOfAuxSymbols = 1;
- CurrentOffset += sizeof(llvm::object::coff_symbol16);
- Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>(
- BufferStart + CurrentOffset);
+ CurrentOffset += sizeof(coff_symbol16);
+ Aux = reinterpret_cast<coff_aux_section_definition *>(BufferStart +
+ CurrentOffset);
Aux->Length = SectionTwoSize;
Aux->NumberOfRelocations = 0;
Aux->NumberOfLinenumbers = 0;
Aux->CheckSum = 0;
Aux->NumberLowPart = 0;
Aux->Selection = 0;
- CurrentOffset += sizeof(llvm::object::coff_aux_section_definition);
+ CurrentOffset += sizeof(coff_aux_section_definition);
// Now write a symbol for each relocation.
for (unsigned i = 0; i < Data.size(); i++) {
char RelocationName[9];
sprintf(RelocationName, "$R%06X", DataOffsets[i]);
- Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(BufferStart +
- CurrentOffset);
- strncpy(Symbol->Name.ShortName, RelocationName,
- (size_t)llvm::COFF::NameSize);
+ Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset);
+ strncpy(Symbol->Name.ShortName, RelocationName, (size_t)COFF::NameSize);
Symbol->Value = DataOffsets[i];
Symbol->SectionNumber = 1;
- Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
- Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
+ Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL;
+ Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
Symbol->NumberOfAuxSymbols = 0;
- CurrentOffset += sizeof(llvm::object::coff_symbol16);
+ CurrentOffset += sizeof(coff_symbol16);
}
}
@@ -607,18 +582,18 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() {
// COFF objects.
std::queue<const WindowsResourceParser::TreeNode *> Queue;
Queue.push(&Resources);
- uint32_t NextLevelOffset = sizeof(llvm::object::coff_resource_dir_table) +
- (Resources.getStringChildren().size() +
- Resources.getIDChildren().size()) *
- sizeof(llvm::object::coff_resource_dir_entry);
+ uint32_t NextLevelOffset =
+ sizeof(coff_resource_dir_table) + (Resources.getStringChildren().size() +
+ Resources.getIDChildren().size()) *
+ sizeof(coff_resource_dir_entry);
std::vector<const WindowsResourceParser::TreeNode *> DataEntriesTreeOrder;
uint32_t CurrentRelativeOffset = 0;
while (!Queue.empty()) {
auto CurrentNode = Queue.front();
Queue.pop();
- auto *Table = reinterpret_cast<llvm::object::coff_resource_dir_table *>(
- BufferStart + CurrentOffset);
+ auto *Table = reinterpret_cast<coff_resource_dir_table *>(BufferStart +
+ CurrentOffset);
Table->Characteristics = CurrentNode->getCharacteristics();
Table->TimeDateStamp = 0;
Table->MajorVersion = CurrentNode->getMajorVersion();
@@ -627,63 +602,63 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() {
auto &StringChildren = CurrentNode->getStringChildren();
Table->NumberOfNameEntries = StringChildren.size();
Table->NumberOfIDEntries = IDChildren.size();
- CurrentOffset += sizeof(llvm::object::coff_resource_dir_table);
- CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_table);
+ CurrentOffset += sizeof(coff_resource_dir_table);
+ CurrentRelativeOffset += sizeof(coff_resource_dir_table);
// Write the directory entries immediately following each directory table.
for (auto const &Child : StringChildren) {
- auto *Entry = reinterpret_cast<llvm::object::coff_resource_dir_entry *>(
- BufferStart + CurrentOffset);
+ auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart +
+ CurrentOffset);
Entry->Identifier.NameOffset =
StringTableOffsets[Child.second->getStringIndex()];
if (Child.second->checkIsDataNode()) {
Entry->Offset.DataEntryOffset = NextLevelOffset;
- NextLevelOffset += sizeof(llvm::object::coff_resource_data_entry);
+ NextLevelOffset += sizeof(coff_resource_data_entry);
DataEntriesTreeOrder.push_back(Child.second.get());
} else {
Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31);
- NextLevelOffset += sizeof(llvm::object::coff_resource_dir_table) +
+ NextLevelOffset += sizeof(coff_resource_dir_table) +
(Child.second->getStringChildren().size() +
Child.second->getIDChildren().size()) *
- sizeof(llvm::object::coff_resource_dir_entry);
+ sizeof(coff_resource_dir_entry);
Queue.push(Child.second.get());
}
- CurrentOffset += sizeof(llvm::object::coff_resource_dir_entry);
- CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry);
+ CurrentOffset += sizeof(coff_resource_dir_entry);
+ CurrentRelativeOffset += sizeof(coff_resource_dir_entry);
}
for (auto const &Child : IDChildren) {
- auto *Entry = reinterpret_cast<llvm::object::coff_resource_dir_entry *>(
- BufferStart + CurrentOffset);
+ auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart +
+ CurrentOffset);
Entry->Identifier.ID = Child.first;
if (Child.second->checkIsDataNode()) {
Entry->Offset.DataEntryOffset = NextLevelOffset;
- NextLevelOffset += sizeof(llvm::object::coff_resource_data_entry);
+ NextLevelOffset += sizeof(coff_resource_data_entry);
DataEntriesTreeOrder.push_back(Child.second.get());
} else {
Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31);
- NextLevelOffset += sizeof(llvm::object::coff_resource_dir_table) +
+ NextLevelOffset += sizeof(coff_resource_dir_table) +
(Child.second->getStringChildren().size() +
Child.second->getIDChildren().size()) *
- sizeof(llvm::object::coff_resource_dir_entry);
+ sizeof(coff_resource_dir_entry);
Queue.push(Child.second.get());
}
- CurrentOffset += sizeof(llvm::object::coff_resource_dir_entry);
- CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry);
+ CurrentOffset += sizeof(coff_resource_dir_entry);
+ CurrentRelativeOffset += sizeof(coff_resource_dir_entry);
}
}
RelocationAddresses.resize(Data.size());
// Now write all the resource data entries.
for (auto DataNodes : DataEntriesTreeOrder) {
- auto *Entry = reinterpret_cast<llvm::object::coff_resource_data_entry *>(
- BufferStart + CurrentOffset);
+ auto *Entry = reinterpret_cast<coff_resource_data_entry *>(BufferStart +
+ CurrentOffset);
RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset;
Entry->DataRVA = 0; // Set to zero because it is a relocation.
Entry->DataSize = Data[DataNodes->getDataIndex()].size();
Entry->Codepage = 0;
Entry->Reserved = 0;
- CurrentOffset += sizeof(llvm::object::coff_resource_data_entry);
- CurrentRelativeOffset += sizeof(llvm::object::coff_resource_data_entry);
+ CurrentOffset += sizeof(coff_resource_data_entry);
+ CurrentRelativeOffset += sizeof(coff_resource_data_entry);
}
}
@@ -710,33 +685,34 @@ void WindowsResourceCOFFWriter::writeFirstSectionRelocations() {
// .rsrc section.
uint32_t NextSymbolIndex = 5;
for (unsigned i = 0; i < Data.size(); i++) {
- auto *Reloc = reinterpret_cast<llvm::object::coff_relocation *>(
- BufferStart + CurrentOffset);
+ auto *Reloc =
+ reinterpret_cast<coff_relocation *>(BufferStart + CurrentOffset);
Reloc->VirtualAddress = RelocationAddresses[i];
Reloc->SymbolTableIndex = NextSymbolIndex++;
switch (MachineType) {
- case Machine::ARM:
- Reloc->Type = llvm::COFF::IMAGE_REL_ARM_ADDR32NB;
+ case COFF::IMAGE_FILE_MACHINE_ARMNT:
+ Reloc->Type = COFF::IMAGE_REL_ARM_ADDR32NB;
break;
- case Machine::X64:
- Reloc->Type = llvm::COFF::IMAGE_REL_AMD64_ADDR32NB;
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ Reloc->Type = COFF::IMAGE_REL_AMD64_ADDR32NB;
break;
- case Machine::X86:
- Reloc->Type = llvm::COFF::IMAGE_REL_I386_DIR32NB;
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ Reloc->Type = COFF::IMAGE_REL_I386_DIR32NB;
break;
default:
Reloc->Type = 0;
}
- CurrentOffset += sizeof(llvm::object::coff_relocation);
+ CurrentOffset += sizeof(coff_relocation);
}
}
-Error writeWindowsResourceCOFF(StringRef OutputFile, Machine MachineType,
- const WindowsResourceParser &Parser) {
+Expected<std::unique_ptr<MemoryBuffer>>
+writeWindowsResourceCOFF(COFF::MachineTypes MachineType,
+ const WindowsResourceParser &Parser) {
Error E = Error::success();
- WindowsResourceCOFFWriter Writer(OutputFile, MachineType, Parser, E);
+ WindowsResourceCOFFWriter Writer(MachineType, Parser, E);
if (E)
- return E;
+ return std::move(E);
return Writer.write();
}
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index ba3a2abe20978..83f3d55b8e556 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -40,6 +40,7 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(FrameCookieKind)
LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym2Flags)
LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym3Flags)
LLVM_YAML_DECLARE_BITSET_TRAITS(ExportFlags)
+LLVM_YAML_DECLARE_BITSET_TRAITS(PublicSymFlags)
LLVM_YAML_DECLARE_BITSET_TRAITS(LocalSymFlags)
LLVM_YAML_DECLARE_BITSET_TRAITS(ProcSymFlags)
LLVM_YAML_DECLARE_BITSET_TRAITS(FrameProcedureOptions)
@@ -93,6 +94,14 @@ void ScalarBitSetTraits<ExportFlags>::bitset(IO &io, ExportFlags &Flags) {
}
}
+void ScalarBitSetTraits<PublicSymFlags>::bitset(IO &io, PublicSymFlags &Flags) {
+ auto FlagNames = getProcSymFlagNames();
+ for (const auto &E : FlagNames) {
+ io.bitSetCase(Flags, E.Name.str().c_str(),
+ static_cast<PublicSymFlags>(E.Value));
+ }
+}
+
void ScalarBitSetTraits<LocalSymFlags>::bitset(IO &io, LocalSymFlags &Flags) {
auto FlagNames = getLocalFlagNames();
for (const auto &E : FlagNames) {
@@ -277,16 +286,15 @@ template <> void SymbolRecordImpl<ExportSym>::map(IO &IO) {
}
template <> void SymbolRecordImpl<ProcSym>::map(IO &IO) {
- // TODO: Print the linkage name
-
- IO.mapRequired("PtrParent", Symbol.Parent);
- IO.mapRequired("PtrEnd", Symbol.End);
- IO.mapRequired("PtrNext", Symbol.Next);
+ IO.mapOptional("PtrParent", Symbol.Parent, 0U);
+ IO.mapOptional("PtrEnd", Symbol.End, 0U);
+ IO.mapOptional("PtrNext", Symbol.Next, 0U);
IO.mapRequired("CodeSize", Symbol.CodeSize);
IO.mapRequired("DbgStart", Symbol.DbgStart);
IO.mapRequired("DbgEnd", Symbol.DbgEnd);
IO.mapRequired("FunctionType", Symbol.FunctionType);
- IO.mapRequired("Segment", Symbol.Segment);
+ IO.mapOptional("Offset", Symbol.CodeOffset, 0U);
+ IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
IO.mapRequired("Flags", Symbol.Flags);
IO.mapRequired("DisplayName", Symbol.Name);
}
@@ -298,9 +306,9 @@ template <> void SymbolRecordImpl<RegisterSym>::map(IO &IO) {
}
template <> void SymbolRecordImpl<PublicSym32>::map(IO &IO) {
- IO.mapRequired("Type", Symbol.Index);
- IO.mapRequired("Seg", Symbol.Segment);
- IO.mapRequired("Off", Symbol.Offset);
+ IO.mapRequired("Flags", Symbol.Flags);
+ IO.mapOptional("Offset", Symbol.Offset, 0U);
+ IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
IO.mapRequired("Name", Symbol.Name);
}
@@ -316,8 +324,8 @@ template <> void SymbolRecordImpl<EnvBlockSym>::map(IO &IO) {
}
template <> void SymbolRecordImpl<InlineSiteSym>::map(IO &IO) {
- IO.mapRequired("PtrParent", Symbol.Parent);
- IO.mapRequired("PtrEnd", Symbol.End);
+ IO.mapOptional("PtrParent", Symbol.Parent, 0U);
+ IO.mapOptional("PtrEnd", Symbol.End, 0U);
IO.mapRequired("Inlinee", Symbol.Inlinee);
// TODO: The binary annotations
}
@@ -359,17 +367,17 @@ template <> void SymbolRecordImpl<DefRangeRegisterRelSym>::map(IO &IO) {
}
template <> void SymbolRecordImpl<BlockSym>::map(IO &IO) {
- // TODO: Print the linkage name
- IO.mapRequired("PtrParent", Symbol.Parent);
- IO.mapRequired("PtrEnd", Symbol.End);
+ IO.mapOptional("PtrParent", Symbol.Parent, 0U);
+ IO.mapOptional("PtrEnd", Symbol.End, 0U);
IO.mapRequired("CodeSize", Symbol.CodeSize);
- IO.mapRequired("Segment", Symbol.Segment);
+ IO.mapOptional("Offset", Symbol.CodeOffset, 0U);
+ IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
IO.mapRequired("BlockName", Symbol.Name);
}
template <> void SymbolRecordImpl<LabelSym>::map(IO &IO) {
- // TODO: Print the linkage name
- IO.mapRequired("Segment", Symbol.Segment);
+ IO.mapOptional("Offset", Symbol.CodeOffset, 0U);
+ IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
IO.mapRequired("Flags", Symbol.Flags);
IO.mapRequired("Flags", Symbol.Flags);
IO.mapRequired("DisplayName", Symbol.Name);
@@ -419,8 +427,8 @@ template <> void SymbolRecordImpl<FrameProcSym>::map(IO &IO) {
}
template <> void SymbolRecordImpl<CallSiteInfoSym>::map(IO &IO) {
- // TODO: Map Linkage Name
- IO.mapRequired("Segment", Symbol.Segment);
+ IO.mapOptional("Offset", Symbol.CodeOffset, 0U);
+ IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
IO.mapRequired("Type", Symbol.Type);
}
@@ -432,14 +440,13 @@ template <> void SymbolRecordImpl<FileStaticSym>::map(IO &IO) {
}
template <> void SymbolRecordImpl<HeapAllocationSiteSym>::map(IO &IO) {
- // TODO: Map Linkage Name
- IO.mapRequired("Segment", Symbol.Segment);
+ IO.mapOptional("Offset", Symbol.CodeOffset, 0U);
+ IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
IO.mapRequired("CallInstructionSize", Symbol.CallInstructionSize);
IO.mapRequired("Type", Symbol.Type);
}
template <> void SymbolRecordImpl<FrameCookieSym>::map(IO &IO) {
- // TODO: Map Linkage Name
IO.mapRequired("Register", Symbol.Register);
IO.mapRequired("CookieKind", Symbol.CookieKind);
IO.mapRequired("Flags", Symbol.Flags);
@@ -478,14 +485,16 @@ template <> void SymbolRecordImpl<ConstantSym>::map(IO &IO) {
}
template <> void SymbolRecordImpl<DataSym>::map(IO &IO) {
- // TODO: Map linkage name
IO.mapRequired("Type", Symbol.Type);
+ IO.mapOptional("Offset", Symbol.DataOffset, 0U);
+ IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
IO.mapRequired("DisplayName", Symbol.Name);
}
template <> void SymbolRecordImpl<ThreadLocalDataSym>::map(IO &IO) {
- // TODO: Map linkage name
IO.mapRequired("Type", Symbol.Type);
+ IO.mapOptional("Offset", Symbol.DataOffset, 0U);
+ IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
IO.mapRequired("DisplayName", Symbol.Name);
}
}
diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
index a03b9cd50faa2..2d1cb4b1b27b9 100644
--- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
@@ -138,7 +138,7 @@ StringRef ScalarTraits<TypeIndex>::input(StringRef Scalar, void *Ctx,
void ScalarTraits<APSInt>::output(const APSInt &S, void *,
llvm::raw_ostream &OS) {
- S.print(OS, true);
+ S.print(OS, S.isSigned());
}
StringRef ScalarTraits<APSInt>::input(StringRef Scalar, void *Ctx, APSInt &S) {
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
index 353d027f4e111..65703c6cf683a 100644
--- a/lib/ObjectYAML/WasmYAML.cpp
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -47,14 +47,22 @@ static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) {
IO.mapOptional("Relocations", Section.Relocations);
}
+static void sectionMapping(IO &IO, WasmYAML::NameSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Name", Section.Name);
+ IO.mapOptional("FunctionNames", Section.FunctionNames);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::LinkingSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Name", Section.Name);
+ IO.mapRequired("SymbolInfo", Section.SymbolInfos);
+}
+
static void sectionMapping(IO &IO, WasmYAML::CustomSection &Section) {
commonSectionMapping(IO, Section);
IO.mapRequired("Name", Section.Name);
- if (Section.Name == "name") {
- IO.mapOptional("FunctionNames", Section.FunctionNames);
- } else {
- IO.mapRequired("Payload", Section.Payload);
- }
+ IO.mapRequired("Payload", Section.Payload);
}
static void sectionMapping(IO &IO, WasmYAML::TypeSection &Section) {
@@ -121,11 +129,29 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
IO.mapRequired("Type", SectionType);
switch (SectionType) {
- case wasm::WASM_SEC_CUSTOM:
- if (!IO.outputting())
- Section.reset(new WasmYAML::CustomSection());
- sectionMapping(IO, *cast<WasmYAML::CustomSection>(Section.get()));
+ case wasm::WASM_SEC_CUSTOM: {
+ StringRef SectionName;
+ if (IO.outputting()) {
+ auto CustomSection = cast<WasmYAML::CustomSection>(Section.get());
+ SectionName = CustomSection->Name;
+ } else {
+ IO.mapRequired("Name", SectionName);
+ }
+ if (SectionName == "linking") {
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::LinkingSection());
+ sectionMapping(IO, *cast<WasmYAML::LinkingSection>(Section.get()));
+ } else if (SectionName == "name") {
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::NameSection());
+ sectionMapping(IO, *cast<WasmYAML::NameSection>(Section.get()));
+ } else {
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::CustomSection(SectionName));
+ sectionMapping(IO, *cast<WasmYAML::CustomSection>(Section.get()));
+ }
break;
+ }
case wasm::WASM_SEC_TYPE:
if (!IO.outputting())
Section.reset(new WasmYAML::TypeSection());
@@ -321,6 +347,12 @@ void MappingTraits<WasmYAML::DataSegment>::mapping(
IO.mapRequired("Content", Segment.Content);
}
+void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
+ WasmYAML::SymbolInfo &Info) {
+ IO.mapRequired("Name", Info.Name);
+ IO.mapRequired("Flags", Info.Flags);
+}
+
void ScalarEnumerationTraits<WasmYAML::ValueType>::enumeration(
IO &IO, WasmYAML::ValueType &Type) {
#define ECase(X) IO.enumCase(Type, #X, wasm::WASM_TYPE_##X);
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index 52a81ff0e1594..acb9e8d015bce 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -194,6 +194,37 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str,
return 0;
}
+// Returns true if one of the Prefixes + In.Names matches Option
+static bool optionMatches(const OptTable::Info &In, StringRef Option) {
+ if (In.Values && In.Prefixes)
+ for (size_t I = 0; In.Prefixes[I]; I++)
+ if (Option == std::string(In.Prefixes[I]) + In.Name)
+ return true;
+ return false;
+}
+
+// This function is for flag value completion.
+// Eg. When "-stdlib=" and "l" was passed to this function, it will return
+// appropiriate values for stdlib, which starts with l.
+std::vector<std::string>
+OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const {
+ // Search all options and return possible values.
+ for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) {
+ if (!optionMatches(In, Option))
+ continue;
+
+ SmallVector<StringRef, 8> Candidates;
+ StringRef(In.Values).split(Candidates, ",", -1, false);
+
+ std::vector<std::string> Result;
+ for (StringRef Val : Candidates)
+ if (Val.startswith(Arg))
+ Result.push_back(Val);
+ return Result;
+ }
+ return {};
+}
+
std::vector<std::string> OptTable::findByPrefix(StringRef Cur) const {
std::vector<std::string> Ret;
for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) {
@@ -336,6 +367,9 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
case Option::FlagClass:
break;
+ case Option::ValuesClass:
+ break;
+
case Option::SeparateClass: case Option::JoinedOrSeparateClass:
case Option::RemainingArgsClass: case Option::RemainingArgsJoinedClass:
Name += ' ';
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
index 4832e659f026d..bf9f040bde525 100644
--- a/lib/Option/Option.cpp
+++ b/lib/Option/Option.cpp
@@ -47,6 +47,7 @@ void Option::print(raw_ostream &O) const {
P(UnknownClass);
P(FlagClass);
P(JoinedClass);
+ P(ValuesClass);
P(SeparateClass);
P(CommaJoinedClass);
P(MultiArgClass);
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index afd66f55720a5..78d5ea955e644 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -464,10 +464,15 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
if (RunProfileGen) {
MPM.addPass(PGOInstrumentationGen());
+ FunctionPassManager FPM;
+ FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+
// Add the profile lowering pass.
InstrProfOptions Options;
if (!ProfileGenFile.empty())
Options.InstrProfileOutput = ProfileGenFile;
+ Options.DoCounterPromotion = true;
MPM.addPass(InstrProfiling(Options));
}
@@ -923,9 +928,6 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MainFPM.add(AlignmentFromAssumptionsPass());
#endif
- // FIXME: Conditionally run LoadCombine here, after it's ported
- // (in case we still have this pass, given its questionable usefulness).
-
// FIXME: add peephole extensions to the PM here.
MainFPM.addPass(InstCombinePass());
MainFPM.addPass(JumpThreadingPass());
diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp
index 015b3c6c2021d..4534e086b39e2 100644
--- a/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -1,4 +1,4 @@
-//===- CoverageMapping.cpp - Code coverage mapping support ------*- C++ -*-===//
+//===- CoverageMapping.cpp - Code coverage mapping support ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -200,6 +200,9 @@ Error CoverageMapping::loadFunctionRecord(
const CoverageMappingRecord &Record,
IndexedInstrProfReader &ProfileReader) {
StringRef OrigFuncName = Record.FunctionName;
+ if (OrigFuncName.empty())
+ return make_error<CoverageMapError>(coveragemap_error::malformed);
+
if (Record.Filenames.empty())
OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName);
else
@@ -300,8 +303,8 @@ namespace {
/// An instantiation set is a collection of functions that have the same source
/// code, ie, template functions specializations.
class FunctionInstantiationSetCollector {
- typedef DenseMap<std::pair<unsigned, unsigned>,
- std::vector<const FunctionRecord *>> MapT;
+ using MapT = DenseMap<std::pair<unsigned, unsigned>,
+ std::vector<const FunctionRecord *>>;
MapT InstantiatedFunctions;
public:
@@ -315,7 +318,6 @@ public:
}
MapT::iterator begin() { return InstantiatedFunctions.begin(); }
-
MapT::iterator end() { return InstantiatedFunctions.end(); }
};
diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index a34f359cd5427..fff0a03ccbe01 100644
--- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -1,4 +1,4 @@
-//===- CoverageMappingReader.cpp - Code coverage mapping reader -*- C++ -*-===//
+//===- CoverageMappingReader.cpp - Code coverage mapping reader -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -62,7 +62,7 @@ void CoverageMappingIterator::increment() {
}
Error RawCoverageReader::readULEB128(uint64_t &Result) {
- if (Data.size() < 1)
+ if (Data.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
unsigned N = 0;
Result = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
@@ -392,9 +392,9 @@ struct CovMapFuncRecordReader {
// A class for reading coverage mapping function records for a module.
template <CovMapVersion Version, class IntPtrT, support::endianness Endian>
class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader {
- typedef typename CovMapTraits<
- Version, IntPtrT>::CovMapFuncRecordType FuncRecordType;
- typedef typename CovMapTraits<Version, IntPtrT>::NameRefType NameRefType;
+ using FuncRecordType =
+ typename CovMapTraits<Version, IntPtrT>::CovMapFuncRecordType;
+ using NameRefType = typename CovMapTraits<Version, IntPtrT>::NameRefType;
// Maps function's name references to the indexes of their records
// in \c Records.
@@ -419,6 +419,8 @@ class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader {
StringRef FuncName;
if (Error Err = CFR->template getFuncName<Endian>(ProfileNames, FuncName))
return Err;
+ if (FuncName.empty())
+ return make_error<InstrProfError>(instrprof_error::malformed);
Records.emplace_back(Version, FuncName, FuncHash, Mapping, FilenamesBegin,
Filenames.size() - FilenamesBegin);
return Error::success();
@@ -574,7 +576,7 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames,
Endian = support::endianness::little;
Data = Data.substr(StringRef(TestingFormatMagic).size());
- if (Data.size() < 1)
+ if (Data.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
unsigned N = 0;
auto ProfileNamesSize =
@@ -582,7 +584,7 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames,
if (N > Data.size())
return make_error<CoverageMapError>(coveragemap_error::malformed);
Data = Data.substr(N);
- if (Data.size() < 1)
+ if (Data.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
N = 0;
uint64_t Address =
@@ -596,7 +598,7 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames,
return E;
CoverageMapping = Data.substr(ProfileNamesSize);
// Skip the padding bytes because coverage map data has an alignment of 8.
- if (CoverageMapping.size() < 1)
+ if (CoverageMapping.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
size_t Pad = alignmentAdjustment(CoverageMapping.data(), 8);
if (CoverageMapping.size() < Pad)
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index c9b82c303e338..005061c4f0680 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -330,14 +330,15 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName) {
return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), PGOFuncName);
}
-void InstrProfSymtab::create(Module &M, bool InLTO) {
+Error InstrProfSymtab::create(Module &M, bool InLTO) {
for (Function &F : M) {
// Function may not have a name: like using asm("") to overwrite the name.
// Ignore in this case.
if (!F.hasName())
continue;
const std::string &PGOFuncName = getPGOFuncName(F, InLTO);
- addFuncName(PGOFuncName);
+ if (Error E = addFuncName(PGOFuncName))
+ return E;
MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
// In ThinLTO, local function may have been promoted to global and have
// suffix added to the function name. We need to add the stripped function
@@ -346,13 +347,15 @@ void InstrProfSymtab::create(Module &M, bool InLTO) {
auto pos = PGOFuncName.find('.');
if (pos != std::string::npos) {
const std::string &OtherFuncName = PGOFuncName.substr(0, pos);
- addFuncName(OtherFuncName);
+ if (Error E = addFuncName(OtherFuncName))
+ return E;
MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
}
}
}
finalizeSymtab();
+ return Error::success();
}
Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
@@ -447,7 +450,8 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
SmallVector<StringRef, 0> Names;
NameStrings.split(Names, getInstrProfNameSeparator());
for (StringRef &Name : Names)
- Symtab.addFuncName(Name);
+ if (Error E = Symtab.addFuncName(Name))
+ return E;
while (P < EndP && *P == 0)
P++;
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index d9f599f400da5..1ed1fb8b6f0b5 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -200,7 +200,8 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
std::pair<StringRef, StringRef> VD = Line->rsplit(':');
uint64_t TakenCount, Value;
if (ValueKind == IPVK_IndirectCallTarget) {
- Symtab->addFuncName(VD.first);
+ if (Error E = Symtab->addFuncName(VD.first))
+ return E;
Value = IndexedInstrProf::ComputeHash(VD.first);
} else {
READ_NUM(VD.first, Value);
@@ -232,7 +233,8 @@ Error TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
// Read the function name.
Record.Name = *Line++;
- Symtab->addFuncName(Record.Name);
+ if (Error E = Symtab->addFuncName(Record.Name))
+ return E;
// Read the function hash.
if (Line.is_at_end())
@@ -482,8 +484,8 @@ InstrProfLookupTrait::ComputeHash(StringRef K) {
return IndexedInstrProf::ComputeHash(HashType, K);
}
-typedef InstrProfLookupTrait::data_type data_type;
-typedef InstrProfLookupTrait::offset_type offset_type;
+using data_type = InstrProfLookupTrait::data_type;
+using offset_type = InstrProfLookupTrait::offset_type;
bool InstrProfLookupTrait::readValueProfilingData(
const unsigned char *&D, const unsigned char *const End) {
@@ -620,7 +622,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]);
- llvm::SummaryEntryVector DetailedSummary;
+ SummaryEntryVector DetailedSummary;
for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I);
DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
@@ -694,7 +696,9 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
return *Symtab.get();
std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
- Index->populateSymtab(*NewSymtab.get());
+ if (Error E = Index->populateSymtab(*NewSymtab.get())) {
+ consumeError(error(InstrProfError::take(std::move(E))));
+ }
Symtab = std::move(NewSymtab);
return *Symtab.get();
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index b3402a6ea956c..9efea78ed2a89 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -69,8 +69,7 @@ public:
write(P[K].D[I]);
}
} else {
- raw_string_ostream &SOStream =
- static_cast<llvm::raw_string_ostream &>(OS);
+ raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
std::string &Data = SOStream.str(); // with flush
for (int K = 0; K < NItems; K++) {
for (int I = 0; I < P[K].N; I++) {
@@ -91,14 +90,14 @@ public:
class InstrProfRecordWriterTrait {
public:
- typedef StringRef key_type;
- typedef StringRef key_type_ref;
+ using key_type = StringRef;
+ using key_type_ref = StringRef;
- typedef const InstrProfWriter::ProfilingData *const data_type;
- typedef const InstrProfWriter::ProfilingData *const data_type_ref;
+ using data_type = const InstrProfWriter::ProfilingData *const;
+ using data_type_ref = const InstrProfWriter::ProfilingData *const;
- typedef uint64_t hash_value_type;
- typedef uint64_t offset_type;
+ using hash_value_type = uint64_t;
+ using offset_type = uint64_t;
support::endianness ValueProfDataEndianness = support::little;
InstrProfSummaryBuilder *SummaryBuilder;
@@ -363,17 +362,19 @@ void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func,
OS << "\n";
}
-void InstrProfWriter::writeText(raw_fd_ostream &OS) {
+Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
if (ProfileKind == PF_IRLevel)
OS << "# IR level Instrumentation Flag\n:ir\n";
InstrProfSymtab Symtab;
for (const auto &I : FunctionData)
if (shouldEncodeData(I.getValue()))
- Symtab.addFuncName(I.getKey());
+ if (Error E = Symtab.addFuncName(I.getKey()))
+ return E;
Symtab.finalizeSymtab();
for (const auto &I : FunctionData)
if (shouldEncodeData(I.getValue()))
for (const auto &Func : I.getValue())
writeRecordInText(Func.second, Symtab, OS);
+ return Error::success();
}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index f36c25a0ce914..deb76cb565d1e 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -37,10 +37,6 @@
using namespace llvm;
-// TODO: Remove these and use APInt qualified types directly.
-typedef APInt::WordType integerPart;
-const unsigned int integerPartWidth = APInt::APINT_BITS_PER_WORD;
-
/// A macro used to combine two fcCategory enums into one key which can be used
/// in a switch statement to classify how the interaction of two APFloat's
/// categories affects an operation.
@@ -51,7 +47,7 @@ const unsigned int integerPartWidth = APInt::APINT_BITS_PER_WORD;
/* Assumed in hexadecimal significand parsing, and conversion to
hexadecimal strings. */
-static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
+static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
namespace llvm {
/* Represents floating point arithmetic semantics. */
@@ -153,8 +149,7 @@ namespace llvm {
const unsigned int maxExponent = 16383;
const unsigned int maxPrecision = 113;
const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
- const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
- / (351 * integerPartWidth));
+ const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
return semantics.precision;
@@ -180,7 +175,7 @@ namespace llvm {
static inline unsigned int
partCountForBits(unsigned int bits)
{
- return ((bits) + integerPartWidth - 1) / integerPartWidth;
+ return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
}
/* Returns 0U-9U. Return values >= 10U are not digits. */
@@ -420,7 +415,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
/* Return the fraction lost were a bignum truncated losing the least
significant BITS bits. */
static lostFraction
-lostFractionThroughTruncation(const integerPart *parts,
+lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
unsigned int partCount,
unsigned int bits)
{
@@ -433,7 +428,7 @@ lostFractionThroughTruncation(const integerPart *parts,
return lfExactlyZero;
if (bits == lsb + 1)
return lfExactlyHalf;
- if (bits <= partCount * integerPartWidth &&
+ if (bits <= partCount * APFloatBase::integerPartWidth &&
APInt::tcExtractBit(parts, bits - 1))
return lfMoreThanHalf;
@@ -442,7 +437,7 @@ lostFractionThroughTruncation(const integerPart *parts,
/* Shift DST right BITS bits noting lost fraction. */
static lostFraction
-shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
+shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
{
lostFraction lost_fraction;
@@ -489,22 +484,22 @@ HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
/* The number of ulps from the boundary (zero, or half if ISNEAREST)
when the least significant BITS are truncated. BITS cannot be
zero. */
-static integerPart
-ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
-{
+static APFloatBase::integerPart
+ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
+ bool isNearest) {
unsigned int count, partBits;
- integerPart part, boundary;
+ APFloatBase::integerPart part, boundary;
assert(bits != 0);
bits--;
- count = bits / integerPartWidth;
- partBits = bits % integerPartWidth + 1;
+ count = bits / APFloatBase::integerPartWidth;
+ partBits = bits % APFloatBase::integerPartWidth + 1;
- part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
+ part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
if (isNearest)
- boundary = (integerPart) 1 << (partBits - 1);
+ boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
else
boundary = 0;
@@ -518,32 +513,30 @@ ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
if (part == boundary) {
while (--count)
if (parts[count])
- return ~(integerPart) 0; /* A lot. */
+ return ~(APFloatBase::integerPart) 0; /* A lot. */
return parts[0];
} else if (part == boundary - 1) {
while (--count)
if (~parts[count])
- return ~(integerPart) 0; /* A lot. */
+ return ~(APFloatBase::integerPart) 0; /* A lot. */
return -parts[0];
}
- return ~(integerPart) 0; /* A lot. */
+ return ~(APFloatBase::integerPart) 0; /* A lot. */
}
/* Place pow(5, power) in DST, and return the number of parts used.
DST must be at least one part larger than size of the answer. */
static unsigned int
-powerOf5(integerPart *dst, unsigned int power)
-{
- static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
- 15625, 78125 };
- integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
+powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
+ static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
+ APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
pow5s[0] = 78125 * 5;
unsigned int partsCount[16] = { 1 };
- integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
+ APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
unsigned int result;
assert(power <= maxExponent);
@@ -572,7 +565,7 @@ powerOf5(integerPart *dst, unsigned int power)
}
if (power & 1) {
- integerPart *tmp;
+ APFloatBase::integerPart *tmp;
APInt::tcFullMultiply(p2, p1, pow5, result, pc);
result += pc;
@@ -608,14 +601,14 @@ static const char NaNU[] = "NAN";
significant nibble. Write out exactly COUNT hexdigits, return
COUNT. */
static unsigned int
-partAsHex (char *dst, integerPart part, unsigned int count,
+partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
const char *hexDigitChars)
{
unsigned int result = count;
- assert(count != 0 && count <= integerPartWidth / 4);
+ assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
- part >>= (integerPartWidth - 4 * count);
+ part >>= (APFloatBase::integerPartWidth - 4 * count);
while (count--) {
dst[count] = hexDigitChars[part & 0xf];
part >>= 4;
@@ -889,11 +882,11 @@ unsigned int IEEEFloat::partCount() const {
return partCountForBits(semantics->precision + 1);
}
-const integerPart *IEEEFloat::significandParts() const {
+const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
return const_cast<IEEEFloat *>(this)->significandParts();
}
-integerPart *IEEEFloat::significandParts() {
+IEEEFloat::integerPart *IEEEFloat::significandParts() {
if (partCount() > 1)
return significand.parts;
else
@@ -916,7 +909,7 @@ void IEEEFloat::incrementSignificand() {
}
/* Add the significand of the RHS. Returns the carry flag. */
-integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
+IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
integerPart *parts;
parts = significandParts();
@@ -929,8 +922,8 @@ integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
/* Subtract the significand of the RHS with a borrow flag. Returns
the borrow flag. */
-integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
- integerPart borrow) {
+IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
+ integerPart borrow) {
integerPart *parts;
parts = significandParts();
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index e9716e3b1e872..c558ddd82161d 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -546,10 +546,7 @@ unsigned APInt::countLeadingZerosSlowCase() const {
return Count;
}
-unsigned APInt::countLeadingOnes() const {
- if (isSingleWord())
- return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
-
+unsigned APInt::countLeadingOnesSlowCase() const {
unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
unsigned shift;
if (!highWordBits) {
@@ -573,9 +570,7 @@ unsigned APInt::countLeadingOnes() const {
return Count;
}
-unsigned APInt::countTrailingZeros() const {
- if (isSingleWord())
- return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth);
+unsigned APInt::countTrailingZerosSlowCase() const {
unsigned Count = 0;
unsigned i = 0;
for (; i < getNumWords() && U.pVal[i] == 0; ++i)
diff --git a/lib/Support/BinaryStreamReader.cpp b/lib/Support/BinaryStreamReader.cpp
index bfb658cfa0b74..e00527f2519e1 100644
--- a/lib/Support/BinaryStreamReader.cpp
+++ b/lib/Support/BinaryStreamReader.cpp
@@ -109,6 +109,12 @@ Error BinaryStreamReader::readStreamRef(BinaryStreamRef &Ref, uint32_t Length) {
return Error::success();
}
+Error BinaryStreamReader::readSubstream(BinarySubstreamRef &Stream,
+ uint32_t Size) {
+ Stream.Offset = getOffset();
+ return readStreamRef(Stream.StreamData, Size);
+}
+
Error BinaryStreamReader::skip(uint32_t Amount) {
if (Amount > bytesRemaining())
return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp
index aca1236395655..60d0964f27646 100644
--- a/lib/Support/CachePruning.cpp
+++ b/lib/Support/CachePruning.cpp
@@ -82,7 +82,7 @@ llvm::parseCachePruningPolicy(StringRef PolicyStr) {
if (Value.back() != '%')
return make_error<StringError>("'" + Value + "' must be a percentage",
inconvertibleErrorCode());
- StringRef SizeStr = Value.slice(0, Value.size() - 1);
+ StringRef SizeStr = Value.drop_back();
uint64_t Size;
if (SizeStr.getAsInteger(0, Size))
return make_error<StringError>("'" + SizeStr + "' not an integer",
@@ -91,7 +91,28 @@ llvm::parseCachePruningPolicy(StringRef PolicyStr) {
return make_error<StringError>("'" + SizeStr +
"' must be between 0 and 100",
inconvertibleErrorCode());
- Policy.PercentageOfAvailableSpace = Size;
+ Policy.MaxSizePercentageOfAvailableSpace = Size;
+ } else if (Key == "cache_size_bytes") {
+ uint64_t Mult = 1;
+ switch (tolower(Value.back())) {
+ case 'k':
+ Mult = 1024;
+ Value = Value.drop_back();
+ break;
+ case 'm':
+ Mult = 1024 * 1024;
+ Value = Value.drop_back();
+ break;
+ case 'g':
+ Mult = 1024 * 1024 * 1024;
+ Value = Value.drop_back();
+ break;
+ }
+ uint64_t Size;
+ if (Value.getAsInteger(0, Size))
+ return make_error<StringError>("'" + Value + "' not an integer",
+ inconvertibleErrorCode());
+ Policy.MaxSizeBytes = Size * Mult;
} else {
return make_error<StringError>("Unknown key: '" + Key + "'",
inconvertibleErrorCode());
@@ -115,11 +136,12 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
if (!isPathDir)
return false;
- Policy.PercentageOfAvailableSpace =
- std::min(Policy.PercentageOfAvailableSpace, 100u);
+ Policy.MaxSizePercentageOfAvailableSpace =
+ std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u);
if (Policy.Expiration == seconds(0) &&
- Policy.PercentageOfAvailableSpace == 0) {
+ Policy.MaxSizePercentageOfAvailableSpace == 0 &&
+ Policy.MaxSizeBytes == 0) {
DEBUG(dbgs() << "No pruning settings set, exit early\n");
// Nothing will be pruned, early exit
return false;
@@ -157,7 +179,8 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
writeTimestampFile(TimestampFile);
}
- bool ShouldComputeSize = (Policy.PercentageOfAvailableSpace > 0);
+ bool ShouldComputeSize =
+ (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0);
// Keep track of space
std::set<std::pair<uint64_t, std::string>> FileSizes;
@@ -216,14 +239,22 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
}
sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get();
auto AvailableSpace = TotalSize + SpaceInfo.free;
- auto FileAndSize = FileSizes.rbegin();
+
+ if (Policy.MaxSizePercentageOfAvailableSpace == 0)
+ Policy.MaxSizePercentageOfAvailableSpace = 100;
+ if (Policy.MaxSizeBytes == 0)
+ Policy.MaxSizeBytes = AvailableSpace;
+ auto TotalSizeTarget = std::min<uint64_t>(
+ AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull,
+ Policy.MaxSizeBytes);
+
DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace)
- << "% target is: " << Policy.PercentageOfAvailableSpace
- << "\n");
+ << "% target is: " << Policy.MaxSizePercentageOfAvailableSpace
+ << "%, " << Policy.MaxSizeBytes << " bytes\n");
+
+ auto FileAndSize = FileSizes.rbegin();
// Remove the oldest accessed files first, till we get below the threshold
- while (((100 * TotalSize) / AvailableSpace) >
- Policy.PercentageOfAvailableSpace &&
- FileAndSize != FileSizes.rend()) {
+ while (TotalSize > TotalSizeTarget && FileAndSize != FileSizes.rend()) {
// Remove the file.
sys::fs::remove(FileAndSize->second);
// Update size
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index de0ca940b405f..0345a5e3d2a1d 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
@@ -1522,13 +1523,9 @@ bool parser<unsigned long long>::parse(Option &O, StringRef ArgName,
// parser<double>/parser<float> implementation
//
static bool parseDouble(Option &O, StringRef Arg, double &Value) {
- SmallString<32> TmpStr(Arg.begin(), Arg.end());
- const char *ArgStart = TmpStr.c_str();
- char *End;
- Value = strtod(ArgStart, &End);
- if (*End != 0)
- return O.error("'" + Arg + "' value invalid for floating point argument!");
- return false;
+ if (to_float(Arg, Value))
+ return false;
+ return O.error("'" + Arg + "' value invalid for floating point argument!");
}
bool parser<double>::parse(Option &O, StringRef ArgName, StringRef Arg,
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index 53c10bcc562e3..0199b300ba72d 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -68,6 +68,13 @@ uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst,
Data.data());
}
+uint32_t DataExtractor::getU24(uint32_t *offset_ptr) const {
+ uint24_t ExtractedVal =
+ getU<uint24_t>(offset_ptr, this, IsLittleEndian, Data.data());
+ // The 3 bytes are in the correct byte order for the host.
+ return ExtractedVal.getAsUint32(sys::IsLittleEndianHost);
+}
+
uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const {
return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data());
}
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index f70b77da8de47..e04bd8bb3b9a1 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -1,4 +1,4 @@
-//===-- GraphWriter.cpp - Implements GraphWriter support routines ---------===//
+//===- GraphWriter.cpp - Implements GraphWriter support routines ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,10 +12,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/GraphWriter.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <system_error>
+#include <string>
+#include <vector>
+
using namespace llvm;
static cl::opt<bool> ViewBackground("view-background", cl::Hidden,
@@ -99,8 +111,10 @@ static bool ExecGraphViewer(StringRef ExecPath, std::vector<const char *> &args,
}
namespace {
+
struct GraphSession {
std::string LogBuffer;
+
bool TryFindProgram(StringRef Names, std::string &ProgramPath) {
raw_string_ostream Log(LogBuffer);
SmallVector<StringRef, 8> parts;
@@ -115,7 +129,8 @@ struct GraphSession {
return false;
}
};
-} // namespace
+
+} // end anonymous namespace
static const char *getProgramName(GraphProgram::Name program) {
switch (program) {
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 320aede79fbb0..2687a67556d3e 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -174,6 +174,7 @@ StringRef Triple::getOSTypeName(OSType Kind) {
switch (Kind) {
case UnknownOS: return "unknown";
+ case Ananas: return "ananas";
case CloudABI: return "cloudabi";
case Darwin: return "darwin";
case DragonFly: return "dragonfly";
@@ -455,6 +456,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
static Triple::OSType parseOS(StringRef OSName) {
return StringSwitch<Triple::OSType>(OSName)
+ .StartsWith("ananas", Triple::Ananas)
.StartsWith("cloudabi", Triple::CloudABI)
.StartsWith("darwin", Triple::Darwin)
.StartsWith("dragonfly", Triple::DragonFly)
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index 457217125a222..0ba6a25aa198d 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -45,5 +45,11 @@ std::string sys::getDefaultTargetTriple() {
TargetTripleString += getOSVersion();
}
+ // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV.
+#if defined(LLVM_TARGET_TRIPLE_ENV)
+ if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV))
+ TargetTripleString = EnvTriple;
+#endif
+
return Triple::normalize(TargetTripleString);
}
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index edbc7938f0cbf..dd39ef935bf92 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -195,6 +195,10 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_EXEC,
flags, fd, 0);
+#elif defined(__NetBSD__) && defined(PROT_MPROTECT)
+ void *pa =
+ ::mmap(start, PageSize * NumPages,
+ PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC), flags, fd, 0);
#else
void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
flags, fd, 0);
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 2df0eaff47e52..1704fa4799428 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -449,11 +449,22 @@ bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<co
size_t ArgLength = Program.size() + 1;
for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
- ArgLength += strlen(*I) + 1;
+ size_t length = strlen(*I);
+
+ // Ensure that we do not exceed the MAX_ARG_STRLEN constant on Linux, which
+ // does not have a constant unlike what the man pages would have you
+ // believe. Since this limit is pretty high, perform the check
+ // unconditionally rather than trying to be aggressive and limiting it to
+ // Linux only.
+ if (length >= (32 * 4096))
+ return false;
+
+ ArgLength += length + 1;
if (ArgLength > size_t(HalfArgMax)) {
return false;
}
}
+
return true;
}
}
diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc
index fe89fe0aad8c4..7e196cf0ce18a 100644
--- a/lib/Support/Windows/Host.inc
+++ b/lib/Support/Windows/Host.inc
@@ -18,5 +18,13 @@
using namespace llvm;
std::string sys::getDefaultTargetTriple() {
- return Triple::normalize(LLVM_DEFAULT_TARGET_TRIPLE);
+ const char *Triple = LLVM_DEFAULT_TARGET_TRIPLE;
+
+ // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV.
+#if defined(LLVM_TARGET_TRIPLE_ENV)
+ if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV))
+ Triple = EnvTriple;
+#endif
+
+ return Triple::normalize(Triple);
}
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 01ae3214453dc..e2f21a56a810a 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -1,4 +1,4 @@
-//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===//
+//===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,15 +13,29 @@
#include "llvm/Support/YAMLParser.h"
#include "llvm/ADT/AllocatorList.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
using namespace llvm;
using namespace yaml;
@@ -37,7 +51,7 @@ enum UnicodeEncodingForm {
/// EncodingInfo - Holds the encoding type and length of the byte order mark if
/// it exists. Length is in {0, 2, 3, 4}.
-typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo;
+using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
/// encoding form of \a Input.
@@ -46,7 +60,7 @@ typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo;
/// @returns An EncodingInfo indicating the Unicode encoding form of the input
/// and how long the byte order mark is if one exists.
static EncodingInfo getUnicodeEncoding(StringRef Input) {
- if (Input.size() == 0)
+ if (Input.empty())
return std::make_pair(UEF_Unknown, 0);
switch (uint8_t(Input[0])) {
@@ -95,8 +109,6 @@ static EncodingInfo getUnicodeEncoding(StringRef Input) {
return std::make_pair(UEF_UTF8, 0);
}
-namespace llvm {
-namespace yaml {
/// Pin the vtables to this file.
void Node::anchor() {}
void NullNode::anchor() {}
@@ -107,6 +119,9 @@ void MappingNode::anchor() {}
void SequenceNode::anchor() {}
void AliasNode::anchor() {}
+namespace llvm {
+namespace yaml {
+
/// Token - A single YAML token.
struct Token {
enum TokenKind {
@@ -133,7 +148,7 @@ struct Token {
TK_Alias,
TK_Anchor,
TK_Tag
- } Kind;
+ } Kind = TK_Error;
/// A string of length 0 or more whose begin() points to the logical location
/// of the token in the input.
@@ -142,14 +157,16 @@ struct Token {
/// The value of a block scalar node.
std::string Value;
- Token() : Kind(TK_Error) {}
+ Token() = default;
};
-}
-}
-typedef llvm::BumpPtrList<Token> TokenQueueT;
+} // end namespace yaml
+} // end namespace llvm
+
+using TokenQueueT = BumpPtrList<Token>;
namespace {
+
/// @brief This struct is used to track simple keys.
///
/// Simple keys are handled by creating an entry in SimpleKeys for each Token
@@ -170,12 +187,13 @@ struct SimpleKey {
return Tok == Other.Tok;
}
};
-}
+
+} // end anonymous namespace
/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
/// subsequence and the subsequence's length in code units (uint8_t).
/// A length of 0 represents an error.
-typedef std::pair<uint32_t, unsigned> UTF8Decoded;
+using UTF8Decoded = std::pair<uint32_t, unsigned>;
static UTF8Decoded decodeUTF8(StringRef Range) {
StringRef::iterator Position= Range.begin();
@@ -229,6 +247,7 @@ static UTF8Decoded decodeUTF8(StringRef Range) {
namespace llvm {
namespace yaml {
+
/// @brief Scans YAML tokens from a MemoryBuffer.
class Scanner {
public:
@@ -350,7 +369,8 @@ private:
/// ns-char.
StringRef::iterator skip_ns_char(StringRef::iterator Position);
- typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator);
+ using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
+
/// @brief Skip minimal well-formed code unit subsequences until Func
/// returns its input.
///
@@ -655,10 +675,10 @@ bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
}
bool yaml::scanTokens(StringRef Input) {
- llvm::SourceMgr SM;
- llvm::yaml::Scanner scanner(Input, SM);
- for (;;) {
- llvm::yaml::Token T = scanner.getNext();
+ SourceMgr SM;
+ Scanner scanner(Input, SM);
+ while (true) {
+ Token T = scanner.getNext();
if (T.Kind == Token::TK_StreamEnd)
break;
else if (T.Kind == Token::TK_Error)
@@ -1744,7 +1764,7 @@ Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
std::error_code *EC)
: scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {}
-Stream::~Stream() {}
+Stream::~Stream() = default;
bool Stream::failed() { return scanner->failed(); }
@@ -1851,8 +1871,6 @@ bool Node::failed() const {
return Doc->failed();
}
-
-
StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
// TODO: Handle newlines properly. We need to remove leading whitespace.
if (Value[0] == '"') { // Double quoted.
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index c410b1d560860..601084f9eae3c 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -8,17 +8,27 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/YAMLTraits.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/YAMLParser.h"
#include "llvm/Support/raw_ostream.h"
-#include <cctype>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
#include <cstring>
+#include <string>
+#include <vector>
+
using namespace llvm;
using namespace yaml;
@@ -26,11 +36,9 @@ using namespace yaml;
// IO
//===----------------------------------------------------------------------===//
-IO::IO(void *Context) : Ctxt(Context) {
-}
+IO::IO(void *Context) : Ctxt(Context) {}
-IO::~IO() {
-}
+IO::~IO() = default;
void *IO::getContext() {
return Ctxt;
@@ -46,15 +54,13 @@ void IO::setContext(void *Context) {
Input::Input(StringRef InputContent, void *Ctxt,
SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt)
- : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr, false, &EC)),
- CurrentNode(nullptr) {
+ : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr, false, &EC)) {
if (DiagHandler)
SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt);
DocIterator = Strm->begin();
}
-Input::~Input() {
-}
+Input::~Input() = default;
std::error_code Input::error() { return EC; }
@@ -398,13 +404,9 @@ bool Input::canElideEmptySequence() {
//===----------------------------------------------------------------------===//
Output::Output(raw_ostream &yout, void *context, int WrapColumn)
- : IO(context), Out(yout), WrapColumn(WrapColumn), Column(0),
- ColumnAtFlowStart(0), ColumnAtMapFlowStart(0), NeedBitValueComma(false),
- NeedFlowSequenceComma(false), EnumerationMatchFound(false),
- NeedsNewLine(false), WriteDefaultValues(false) {}
+ : IO(context), Out(yout), WrapColumn(WrapColumn) {}
-Output::~Output() {
-}
+Output::~Output() = default;
bool Output::outputting() {
return true;
@@ -911,12 +913,9 @@ void ScalarTraits<double>::output(const double &Val, void *, raw_ostream &Out) {
}
StringRef ScalarTraits<double>::input(StringRef Scalar, void *, double &Val) {
- SmallString<32> buff(Scalar.begin(), Scalar.end());
- char *end;
- Val = strtod(buff.c_str(), &end);
- if (*end != '\0')
- return "invalid floating point number";
- return StringRef();
+ if (to_float(Scalar, Val))
+ return StringRef();
+ return "invalid floating point number";
}
void ScalarTraits<float>::output(const float &Val, void *, raw_ostream &Out) {
@@ -924,12 +923,9 @@ void ScalarTraits<float>::output(const float &Val, void *, raw_ostream &Out) {
}
StringRef ScalarTraits<float>::input(StringRef Scalar, void *, float &Val) {
- SmallString<32> buff(Scalar.begin(), Scalar.end());
- char *end;
- Val = strtod(buff.c_str(), &end);
- if (*end != '\0')
- return "invalid floating point number";
- return StringRef();
+ if (to_float(Scalar, Val))
+ return StringRef();
+ return "invalid floating point number";
}
void ScalarTraits<Hex8>::output(const Hex8 &Val, void *, raw_ostream &Out) {
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 1abc8ed8683d5..9480cd46d28fc 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -548,7 +548,11 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
pos += Size;
#ifndef LLVM_ON_WIN32
+#if defined(__linux__)
+ bool ShouldWriteInChunks = true;
+#else
bool ShouldWriteInChunks = false;
+#endif
#else
// Writing a large size of output to Windows console returns ENOMEM. It seems
// that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index 3e0e3978b90b5..37b9690d0434a 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -31,6 +31,7 @@ class MachineFunctionPass;
FunctionPass *createAArch64DeadRegisterDefinitions();
FunctionPass *createAArch64RedundantCopyEliminationPass();
+FunctionPass *createAArch64CondBrTuning();
FunctionPass *createAArch64ConditionalCompares();
FunctionPass *createAArch64AdvSIMDScalar();
FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
@@ -55,6 +56,7 @@ void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
void initializeAArch64CollectLOHPass(PassRegistry&);
+void initializeAArch64CondBrTuningPass(PassRegistry &);
void initializeAArch64ConditionalComparesPass(PassRegistry&);
void initializeAArch64ConditionOptimizerPass(PassRegistry&);
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp
new file mode 100644
index 0000000000000..f27bc97ec3f3e
--- /dev/null
+++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -0,0 +1,336 @@
+//===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions
+/// into a conditional branch (B.cond), when the NZCV flags can be set for
+/// "free". This is preferred on targets that have more flexibility when
+/// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming
+/// all other variables are equal). This can also reduce register pressure.
+///
+/// A few examples:
+///
+/// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
+/// cbz w8, .LBB_2 -> b.eq .LBB0_2
+///
+/// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
+/// cbz w8, .LBB1_2 -> b.eq .LBB1_2
+///
+/// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
+/// tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
+///
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-cond-br-tuning"
+#define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning"
+
+namespace {
+class AArch64CondBrTuning : public MachineFunctionPass {
+ const AArch64InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ MachineRegisterInfo *MRI;
+
+public:
+ static char ID;
+ AArch64CondBrTuning() : MachineFunctionPass(ID) {
+ initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; }
+
+private:
+ MachineInstr *getOperandDef(const MachineOperand &MO);
+ MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting);
+ MachineInstr *convertToCondBr(MachineInstr &MI);
+ bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI);
+};
+} // end anonymous namespace
+
+char AArch64CondBrTuning::ID = 0;
+
+INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning",
+ AARCH64_CONDBR_TUNING_NAME, false, false)
+
+void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) {
+ if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ return nullptr;
+ return MRI->getUniqueVRegDef(MO.getReg());
+}
+
+MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
+ bool IsFlagSetting) {
+ // If this is already the flag setting version of the instruction (e.g., SUBS)
+ // just make sure the implicit-def of NZCV isn't marked dead.
+ if (IsFlagSetting) {
+ for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands();
+ I != E; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV)
+ MO.setIsDead(false);
+ }
+ return &MI;
+ }
+ bool Is64Bit;
+ unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit);
+ unsigned NewDestReg = MI.getOperand(0).getReg();
+ if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg()))
+ NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+
+ MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+ TII->get(NewOpc), NewDestReg);
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
+ MIB.add(MI.getOperand(I));
+
+ return MIB;
+}
+
+MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) {
+ AArch64CC::CondCode CC;
+ MachineBasicBlock *TargetMBB = TII->getBranchDestBlock(MI);
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ CC = AArch64CC::EQ;
+ break;
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ CC = AArch64CC::NE;
+ break;
+ case AArch64::TBZW:
+ case AArch64::TBZX:
+ CC = AArch64CC::GE;
+ break;
+ case AArch64::TBNZW:
+ case AArch64::TBNZX:
+ CC = AArch64CC::LT;
+ break;
+ }
+ return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc))
+ .addImm(CC)
+ .addMBB(TargetMBB);
+}
+
+bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
+ MachineInstr &DefMI) {
+ // We don't want NZCV bits live across blocks.
+ if (MI.getParent() != DefMI.getParent())
+ return false;
+
+ bool IsFlagSetting = true;
+ unsigned MIOpc = MI.getOpcode();
+ MachineInstr *NewCmp = nullptr, *NewBr = nullptr;
+ switch (DefMI.getOpcode()) {
+ default:
+ return false;
+ case AArch64::ADDWri:
+ case AArch64::ADDWrr:
+ case AArch64::ADDWrs:
+ case AArch64::ADDWrx:
+ case AArch64::ANDWri:
+ case AArch64::ANDWrr:
+ case AArch64::ANDWrs:
+ case AArch64::BICWrr:
+ case AArch64::BICWrs:
+ case AArch64::SUBWri:
+ case AArch64::SUBWrr:
+ case AArch64::SUBWrs:
+ case AArch64::SUBWrx:
+ IsFlagSetting = false;
+ case AArch64::ADDSWri:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSWrx:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
+ case AArch64::ANDSWrs:
+ case AArch64::BICSWrr:
+ case AArch64::BICSWrs:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSWrx:
+ switch (MIOpc) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+
+ case AArch64::CBZW:
+ case AArch64::CBNZW:
+ case AArch64::TBZW:
+ case AArch64::TBNZW:
+ // Check to see if the TBZ/TBNZ is checking the sign bit.
+ if ((MIOpc == AArch64::TBZW || MIOpc == AArch64::TBNZW) &&
+ MI.getOperand(1).getImm() != 31)
+ return false;
+
+ // There must not be any instruction between DefMI and MI that clobbers or
+ // reads NZCV.
+ MachineBasicBlock::iterator I(DefMI), E(MI);
+ for (I = std::next(I); I != E; ++I) {
+ if (I->modifiesRegister(AArch64::NZCV, TRI) ||
+ I->readsRegister(AArch64::NZCV, TRI))
+ return false;
+ }
+ DEBUG(dbgs() << " Replacing instructions:\n ");
+ DEBUG(DefMI.print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG(MI.print(dbgs()));
+
+ NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
+ NewBr = convertToCondBr(MI);
+ break;
+ }
+ break;
+
+ case AArch64::ADDXri:
+ case AArch64::ADDXrr:
+ case AArch64::ADDXrs:
+ case AArch64::ADDXrx:
+ case AArch64::ANDXri:
+ case AArch64::ANDXrr:
+ case AArch64::ANDXrs:
+ case AArch64::BICXrr:
+ case AArch64::BICXrs:
+ case AArch64::SUBXri:
+ case AArch64::SUBXrr:
+ case AArch64::SUBXrs:
+ case AArch64::SUBXrx:
+ IsFlagSetting = false;
+ case AArch64::ADDSXri:
+ case AArch64::ADDSXrr:
+ case AArch64::ADDSXrs:
+ case AArch64::ADDSXrx:
+ case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::ANDSXrs:
+ case AArch64::BICSXrr:
+ case AArch64::BICSXrs:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::SUBSXrs:
+ case AArch64::SUBSXrx:
+ switch (MIOpc) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+
+ case AArch64::CBZX:
+ case AArch64::CBNZX:
+ case AArch64::TBZX:
+ case AArch64::TBNZX: {
+ // Check to see if the TBZ/TBNZ is checking the sign bit.
+ if ((MIOpc == AArch64::TBZX || MIOpc == AArch64::TBNZX) &&
+ MI.getOperand(1).getImm() != 63)
+ return false;
+ // There must not be any instruction between DefMI and MI that clobbers or
+ // reads NZCV.
+ MachineBasicBlock::iterator I(DefMI), E(MI);
+ for (I = std::next(I); I != E; ++I) {
+ if (I->modifiesRegister(AArch64::NZCV, TRI) ||
+ I->readsRegister(AArch64::NZCV, TRI))
+ return false;
+ }
+ DEBUG(dbgs() << " Replacing instructions:\n ");
+ DEBUG(DefMI.print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG(MI.print(dbgs()));
+
+ NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
+ NewBr = convertToCondBr(MI);
+ break;
+ }
+ }
+ break;
+ }
+ assert(NewCmp && NewBr && "Expected new instructions.");
+
+ DEBUG(dbgs() << " with instruction:\n ");
+ DEBUG(NewCmp->print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG(NewBr->print(dbgs()));
+
+ // If this was a flag setting version of the instruction, we use the original
+ // instruction by just clearing the dead marked on the implicit-def of NCZV.
+ // Therefore, we should not erase this instruction.
+ if (!IsFlagSetting)
+ DefMI.eraseFromParent();
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ DEBUG(dbgs() << "********** AArch64 Conditional Branch Tuning **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ bool LocalChange = false;
+ for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(),
+ E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ case AArch64::TBZW:
+ case AArch64::TBZX:
+ case AArch64::TBNZW:
+ case AArch64::TBNZX:
+ MachineInstr *DefMI = getOperandDef(MI.getOperand(0));
+ LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI));
+ break;
+ }
+ // If the optimization was successful, we can't optimize any other
+ // branches because doing so would clobber the NZCV flags.
+ if (LocalChange) {
+ Changed = true;
+ break;
+ }
+ }
+ }
+ return Changed;
+}
+
+FunctionPass *llvm::createAArch64CondBrTuning() {
+ return new AArch64CondBrTuning();
+}
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 544f67433fd53..ee54550c9900b 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -13,7 +13,9 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -84,6 +86,51 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
DEBUG(dbgs() << " Ignoring, XZR or WZR already used by the instruction\n");
continue;
}
+ if (MF.getSubtarget<AArch64Subtarget>().hasLSE()) {
+ // XZ/WZ for LSE can only be used when acquire semantics are not used,
+ // LDOPAL WZ is an invalid opcode.
+ switch (MI.getOpcode()) {
+ case AArch64::CASALb:
+ case AArch64::CASALh:
+ case AArch64::CASALs:
+ case AArch64::CASALd:
+ case AArch64::SWPALb:
+ case AArch64::SWPALh:
+ case AArch64::SWPALs:
+ case AArch64::SWPALd:
+ case AArch64::LDADDALb:
+ case AArch64::LDADDALh:
+ case AArch64::LDADDALs:
+ case AArch64::LDADDALd:
+ case AArch64::LDEORALb:
+ case AArch64::LDEORALh:
+ case AArch64::LDEORALs:
+ case AArch64::LDEORALd:
+ case AArch64::LDSETALb:
+ case AArch64::LDSETALh:
+ case AArch64::LDSETALs:
+ case AArch64::LDSETALd:
+ case AArch64::LDSMINALb:
+ case AArch64::LDSMINALh:
+ case AArch64::LDSMINALs:
+ case AArch64::LDSMINALd:
+ case AArch64::LDSMAXALb:
+ case AArch64::LDSMAXALh:
+ case AArch64::LDSMAXALs:
+ case AArch64::LDSMAXALd:
+ case AArch64::LDUMINALb:
+ case AArch64::LDUMINALh:
+ case AArch64::LDUMINALs:
+ case AArch64::LDUMINALd:
+ case AArch64::LDUMAXALb:
+ case AArch64::LDUMAXALh:
+ case AArch64::LDUMAXALs:
+ case AArch64::LDUMAXALd:
+ continue;
+ default:
+ break;
+ }
+ }
const MCInstrDesc &Desc = MI.getDesc();
for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 8c2c0a564c302..04687847c1a30 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -201,7 +201,7 @@ private:
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
- void SelectCMP_SWAP(SDNode *N);
+ bool SelectCMP_SWAP(SDNode *N);
};
} // end anonymous namespace
@@ -2609,9 +2609,13 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
}
/// We've got special pseudo-instructions for these
-void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
+bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
unsigned Opcode;
EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
+
+ // Leave IR for LSE if subtarget supports it.
+ if (Subtarget->hasLSE()) return false;
+
if (MemTy == MVT::i8)
Opcode = AArch64::CMP_SWAP_8;
else if (MemTy == MVT::i16)
@@ -2637,6 +2641,8 @@ void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
CurDAG->RemoveDeadNode(N);
+
+ return true;
}
void AArch64DAGToDAGISel::Select(SDNode *Node) {
@@ -2660,8 +2666,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
case ISD::ATOMIC_CMP_SWAP:
- SelectCMP_SWAP(Node);
- return;
+ if (SelectCMP_SWAP(Node))
+ return;
+ break;
case ISD::READ_REGISTER:
if (tryReadRegister(Node))
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 083ca2156598f..2965106fd2708 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10563,11 +10563,20 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
+ if (Size > 128) return AtomicExpansionKind::None;
+ // Nand not supported in LSE.
+ if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC;
+ // Currently leaving And and Sub to LLSC
+ if ((AI->getOperation() == AtomicRMWInst::And) || (AI->getOperation() == AtomicRMWInst::Sub))
+ return AtomicExpansionKind::LLSC;
+ // Leave 128 bits to LLSC.
+ return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
}
bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
+ // If subtarget has LSE, leave cmpxchg intact for codegen.
+ if (Subtarget->hasLSE()) return false;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td
index 71826bec6b11f..de283b70210fe 100644
--- a/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -405,3 +405,49 @@ def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch),
(ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
GPR64:$newLo, GPR64:$newHi), []>,
Sched<[WriteAtomic]>;
+
+// v8.1 Atomic instructions:
+def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALb GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
+def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALh GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
+def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALs GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
+def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rold, GPR64:$Rnew), (CASALd GPR64:$Rold, GPR64:$Rnew, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index eea012382150c..314e89bbca863 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1036,7 +1036,7 @@ static bool UpdateOperandRegClass(MachineInstr &Instr) {
/// \brief Return the opcode that does not set flags when possible - otherwise
/// return the original opcode. The caller is responsible to do the actual
/// substitution and legality checking.
-static unsigned convertFlagSettingOpcode(const MachineInstr &MI) {
+static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
// Don't convert all compare instructions, because for some the zero register
// encoding becomes the sp register.
bool MIDefinesZeroReg = false;
@@ -1145,7 +1145,7 @@ bool AArch64InstrInfo::optimizeCompareInstr(
return true;
}
unsigned Opc = CmpInstr.getOpcode();
- unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
+ unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
if (NewOpc == Opc)
return false;
const MCInstrDesc &MCID = get(NewOpc);
@@ -3318,7 +3318,7 @@ static bool getMaddPatterns(MachineInstr &Root,
// When NZCV is live bail out.
if (Cmp_NZCV == -1)
return false;
- unsigned NewOpc = convertFlagSettingOpcode(Root);
+ unsigned NewOpc = convertToNonFlagSettingOpc(Root);
// When opcode can't change bail out.
// CHECKME: do we miss any cases for opcode conversion?
if (NewOpc == Opc)
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 59f3405fe439a..58e9ce583d44c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -119,6 +119,44 @@ public:
}
}
+ /// \brief Return the opcode that set flags when possible. The caller is
+ /// responsible for ensuring the opc has a flag setting equivalent.
+ static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Opcode has no flag setting equivalent!");
+ // 32-bit cases:
+ case AArch64::ADDWri: Is64Bit = false; return AArch64::ADDSWri;
+ case AArch64::ADDWrr: Is64Bit = false; return AArch64::ADDSWrr;
+ case AArch64::ADDWrs: Is64Bit = false; return AArch64::ADDSWrs;
+ case AArch64::ADDWrx: Is64Bit = false; return AArch64::ADDSWrx;
+ case AArch64::ANDWri: Is64Bit = false; return AArch64::ANDSWri;
+ case AArch64::ANDWrr: Is64Bit = false; return AArch64::ANDSWrr;
+ case AArch64::ANDWrs: Is64Bit = false; return AArch64::ANDSWrs;
+ case AArch64::BICWrr: Is64Bit = false; return AArch64::BICSWrr;
+ case AArch64::BICWrs: Is64Bit = false; return AArch64::BICSWrs;
+ case AArch64::SUBWri: Is64Bit = false; return AArch64::SUBSWri;
+ case AArch64::SUBWrr: Is64Bit = false; return AArch64::SUBSWrr;
+ case AArch64::SUBWrs: Is64Bit = false; return AArch64::SUBSWrs;
+ case AArch64::SUBWrx: Is64Bit = false; return AArch64::SUBSWrx;
+ // 64-bit cases:
+ case AArch64::ADDXri: Is64Bit = true; return AArch64::ADDSXri;
+ case AArch64::ADDXrr: Is64Bit = true; return AArch64::ADDSXrr;
+ case AArch64::ADDXrs: Is64Bit = true; return AArch64::ADDSXrs;
+ case AArch64::ADDXrx: Is64Bit = true; return AArch64::ADDSXrx;
+ case AArch64::ANDXri: Is64Bit = true; return AArch64::ANDSXri;
+ case AArch64::ANDXrr: Is64Bit = true; return AArch64::ANDSXrr;
+ case AArch64::ANDXrs: Is64Bit = true; return AArch64::ANDSXrs;
+ case AArch64::BICXrr: Is64Bit = true; return AArch64::BICSXrr;
+ case AArch64::BICXrs: Is64Bit = true; return AArch64::BICSXrs;
+ case AArch64::SUBXri: Is64Bit = true; return AArch64::SUBSXri;
+ case AArch64::SUBXrr: Is64Bit = true; return AArch64::SUBSXrr;
+ case AArch64::SUBXrs: Is64Bit = true; return AArch64::SUBSXrs;
+ case AArch64::SUBXrx: Is64Bit = true; return AArch64::SUBSXrx;
+ }
+ }
+
+
/// Return true if this is a load/store that can be potentially paired/merged.
bool isCandidateToMergeOrPair(MachineInstr &MI) const;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 9243eb91cc1ac..005f2d51e4036 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -795,6 +795,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
int LoadSize = getMemScale(*LoadI);
int StoreSize = getMemScale(*StoreI);
unsigned LdRt = getLdStRegOp(*LoadI).getReg();
+ const MachineOperand &StMO = getLdStRegOp(*StoreI);
unsigned StRt = getLdStRegOp(*StoreI).getReg();
bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
@@ -807,7 +808,13 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
// Remove the load, if the destination register of the loads is the same
// register for stored value.
if (StRt == LdRt && LoadSize == 8) {
- StoreI->clearRegisterKills(StRt, TRI);
+ for (MachineInstr &MI : make_range(StoreI->getIterator(),
+ LoadI->getIterator())) {
+ if (MI.killsRegister(StRt, TRI)) {
+ MI.clearRegisterKills(StRt, TRI);
+ break;
+ }
+ }
DEBUG(dbgs() << "Remove load instruction:\n ");
DEBUG(LoadI->print(dbgs()));
DEBUG(dbgs() << "\n");
@@ -819,7 +826,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
.addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
- .addReg(StRt)
+ .add(StMO)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else {
// FIXME: Currently we disable this transformation in big-endian targets as
@@ -860,14 +867,14 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
DestReg)
- .addReg(StRt)
+ .add(StMO)
.addImm(AndMaskEncoded);
} else {
BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
DestReg)
- .addReg(StRt)
+ .add(StMO)
.addImm(Immr)
.addImm(Imms);
}
@@ -876,7 +883,10 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
// Clear kill flags between store and load.
for (MachineInstr &MI : make_range(StoreI->getIterator(),
BitExtMI->getIterator()))
- MI.clearRegisterKills(StRt, TRI);
+ if (MI.killsRegister(StRt, TRI)) {
+ MI.clearRegisterKills(StRt, TRI);
+ break;
+ }
DEBUG(dbgs() << "Promoting load by replacing :\n ");
DEBUG(StoreI->print(dbgs()));
diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp
index 3b71d529db59b..ccc9d2ad1b482 100644
--- a/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -7,37 +7,27 @@
//
//===----------------------------------------------------------------------===//
//
-// \file This file contains the AArch64 implementation of the DAG scheduling mutation
-// to pair instructions back to back.
+/// \file This file contains the AArch64 implementation of the DAG scheduling
+/// mutation to pair instructions back to back.
//
//===----------------------------------------------------------------------===//
#include "AArch64MacroFusion.h"
#include "AArch64Subtarget.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/CodeGen/MacroFusion.h"
#include "llvm/Target/TargetInstrInfo.h"
-#define DEBUG_TYPE "misched"
-
-STATISTIC(NumFused, "Number of instr pairs fused");
-
using namespace llvm;
-static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden,
- cl::desc("Enable scheduling for macro fusion."), cl::init(true));
-
namespace {
-/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused
-/// together. Given an anchor instr, when the other instr is unspecified, then
-/// check if the anchor instr may be part of a fused pair at all.
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given SecondMI, when FirstMI is unspecified, then check if
+/// SecondMI may be part of a fused pair at all.
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const TargetSubtargetInfo &TSI,
const MachineInstr *FirstMI,
- const MachineInstr *SecondMI) {
- assert((FirstMI || SecondMI) && "At least one instr must be specified");
-
+ const MachineInstr &SecondMI) {
const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII);
const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
@@ -45,9 +35,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
unsigned FirstOpcode =
FirstMI ? FirstMI->getOpcode()
: static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
- unsigned SecondOpcode =
- SecondMI ? SecondMI->getOpcode()
- : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
if (ST.hasArithmeticBccFusion())
// Fuse CMN, CMP, TST followed by Bcc.
@@ -128,158 +116,49 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
if (ST.hasFuseAES())
// Fuse AES crypto operations.
- switch(FirstOpcode) {
+ switch(SecondOpcode) {
// AES encode.
- case AArch64::AESErr:
- return SecondOpcode == AArch64::AESMCrr ||
- SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ case AArch64::AESMCrr :
+ return FirstOpcode == AArch64::AESErr ||
+ FirstOpcode == AArch64::INSTRUCTION_LIST_END;
// AES decode.
- case AArch64::AESDrr:
- return SecondOpcode == AArch64::AESIMCrr ||
- SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ case AArch64::AESIMCrr:
+ return FirstOpcode == AArch64::AESDrr ||
+ FirstOpcode == AArch64::INSTRUCTION_LIST_END;
}
if (ST.hasFuseLiterals())
// Fuse literal generation operations.
- switch (FirstOpcode) {
+ switch (SecondOpcode) {
// PC relative address.
- case AArch64::ADRP:
- return SecondOpcode == AArch64::ADDXri ||
- SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ case AArch64::ADDXri:
+ return FirstOpcode == AArch64::ADRP ||
+ FirstOpcode == AArch64::INSTRUCTION_LIST_END;
// 32 bit immediate.
- case AArch64::MOVZWi:
- return (SecondOpcode == AArch64::MOVKWi &&
- SecondMI->getOperand(3).getImm() == 16) ||
- SecondOpcode == AArch64::INSTRUCTION_LIST_END;
- // Lower half of 64 bit immediate.
- case AArch64::MOVZXi:
- return (SecondOpcode == AArch64::MOVKXi &&
- SecondMI->getOperand(3).getImm() == 16) ||
- SecondOpcode == AArch64::INSTRUCTION_LIST_END;
- // Upper half of 64 bit immediate.
+ case AArch64::MOVKWi:
+ return (FirstOpcode == AArch64::MOVZWi &&
+ SecondMI.getOperand(3).getImm() == 16) ||
+ FirstOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Lower and upper half of 64 bit immediate.
case AArch64::MOVKXi:
- return FirstMI->getOperand(3).getImm() == 32 &&
- ((SecondOpcode == AArch64::MOVKXi &&
- SecondMI->getOperand(3).getImm() == 48) ||
- SecondOpcode == AArch64::INSTRUCTION_LIST_END);
+ return FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
+ (FirstOpcode == AArch64::MOVZXi &&
+ SecondMI.getOperand(3).getImm() == 16) ||
+ (FirstOpcode == AArch64::MOVKXi &&
+ FirstMI->getOperand(3).getImm() == 32 &&
+ SecondMI.getOperand(3).getImm() == 48);
}
return false;
}
-/// \brief Implement the fusion of instr pairs in the scheduling DAG,
-/// anchored at the instr in AnchorSU..
-static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) {
- const MachineInstr *AnchorMI = AnchorSU.getInstr();
- if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient())
- return false;
-
- // If the anchor instr is the ExitSU, then consider its predecessors;
- // otherwise, its successors.
- bool Preds = (&AnchorSU == &DAG->ExitSU);
- SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs;
-
- const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI;
- const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr;
-
- // Check if the anchor instr may be fused.
- if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
- FirstMI, SecondMI))
- return false;
-
- // Explorer for fusion candidates among the dependencies of the anchor instr.
- for (SDep &Dep : AnchorDeps) {
- // Ignore dependencies that don't enforce ordering.
- if (Dep.isWeak())
- continue;
-
- SUnit &DepSU = *Dep.getSUnit();
- // Ignore the ExitSU if the dependents are successors.
- if (!Preds && &DepSU == &DAG->ExitSU)
- continue;
-
- const MachineInstr *DepMI = DepSU.getInstr();
- if (!DepMI || DepMI->isPseudo() || DepMI->isTransient())
- continue;
-
- FirstMI = Preds ? DepMI : AnchorMI;
- SecondMI = Preds ? AnchorMI : DepMI;
- if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
- FirstMI, SecondMI))
- continue;
-
- // Create a single weak edge between the adjacent instrs. The only effect is
- // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
- SUnit &FirstSU = Preds ? DepSU : AnchorSU;
- SUnit &SecondSU = Preds ? AnchorSU : DepSU;
- DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
-
- // Adjust the latency between the anchor instr and its
- // predecessors/successors.
- for (SDep &IDep : AnchorDeps)
- if (IDep.getSUnit() == &DepSU)
- IDep.setLatency(0);
-
- // Adjust the latency between the dependent instr and its
- // successors/predecessors.
- for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds)
- if (IDep.getSUnit() == &AnchorSU)
- IDep.setLatency(0);
-
- DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
- FirstSU.print(dbgs(), DAG); dbgs() << " - ";
- SecondSU.print(dbgs(), DAG); dbgs() << " / ";
- dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " <<
- DAG->TII->getName(SecondMI->getOpcode()) << '\n'; );
-
- if (&SecondSU != &DAG->ExitSU)
- // Make instructions dependent on FirstSU also dependent on SecondSU to
- // prevent them from being scheduled between FirstSU and and SecondSU.
- for (SUnit::const_succ_iterator
- SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end();
- SI != SE; ++SI) {
- if (!SI->getSUnit() || SI->getSUnit() == &SecondSU)
- continue;
- DEBUG(dbgs() << " Copy Succ ";
- SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';);
- DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial));
- }
-
- ++NumFused;
- return true;
- }
-
- return false;
-}
-
-/// \brief Post-process the DAG to create cluster edges between instrs that may
-/// be fused by the processor into a single operation.
-class AArch64MacroFusion : public ScheduleDAGMutation {
-public:
- AArch64MacroFusion() {}
-
- void apply(ScheduleDAGInstrs *DAGInstrs) override;
-};
-
-void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
- ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
- // For each of the SUnits in the scheduling block, try to fuse the instr in it
- // with one in its successors.
- for (SUnit &ISU : DAG->SUnits)
- scheduleAdjacentImpl(DAG, ISU);
-
- // Try to fuse the instr in the ExitSU with one in its predecessors.
- scheduleAdjacentImpl(DAG, DAG->ExitSU);
-}
-
} // end namespace
namespace llvm {
std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () {
- return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr;
+ return createMacroFusionDAGMutation(shouldScheduleAdjacent);
}
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64MacroFusion.h b/lib/Target/AArch64/AArch64MacroFusion.h
index e5efedd9fbfd9..32d90d4c40d6f 100644
--- a/lib/Target/AArch64/AArch64MacroFusion.h
+++ b/lib/Target/AArch64/AArch64MacroFusion.h
@@ -2,23 +2,18 @@
//
// The LLVM Compiler Infrastructure
//
-// \fileThis file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
-// This file contains the AArch64 definition of the DAG scheduling mutation
-// to pair instructions back to back.
+/// \file This file contains the AArch64 definition of the DAG scheduling
+/// mutation to pair instructions back to back.
//
//===----------------------------------------------------------------------===//
-#include "AArch64InstrInfo.h"
#include "llvm/CodeGen/MachineScheduler.h"
-//===----------------------------------------------------------------------===//
-// AArch64MacroFusion - DAG post-processing to encourage fusion of macro ops.
-//===----------------------------------------------------------------------===//
-
namespace llvm {
/// Note that you have to add:
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 9b3899e0681cf..69124dbd0f838 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -469,10 +469,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
/*NumOperands*/ 2);
}
- case TargetOpcode::G_SEQUENCE:
- // FIXME: support this, but the generic code is really not going to do
- // anything sane.
- return getInvalidInstructionMapping();
default:
break;
}
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index 303398ea0b7f3..5d1608ef04afa 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// The Cortex-A57 is a traditional superscaler microprocessor with a
+// The Cortex-A57 is a traditional superscalar microprocessor with a
// conservative 3-wide in-order stage for decode and dispatch. Combined with the
// much wider out-of-order issue stage, this produced a need to carefully
// schedule micro-ops so that all three decoded each cycle are successfully
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index 3d737402022d8..0aeb1f3e30584 100644
--- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -32,8 +32,8 @@
//===----------------------------------------------------------------------===//
// Define 0 micro-op types
-def FalkorWr_none_1cyc : SchedWriteRes<[]> {
- let Latency = 1;
+def FalkorWr_LdStInc_none_3cyc : SchedWriteRes<[]> {
+ let Latency = 3;
let NumMicroOps = 0;
}
def FalkorWr_none_3cyc : SchedWriteRes<[]> {
@@ -505,7 +505,8 @@ def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
let NumMicroOps = 12;
}
-// Forwarding logic is modeled for multiply add/accumulate.
+// Forwarding logic is modeled for multiply add/accumulate and
+// load/store base register increment.
// -----------------------------------------------------------------------------
def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>;
@@ -513,9 +514,13 @@ def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr
def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
+def FalkorReadIncLd : SchedReadAdvance<2, [FalkorWr_LdStInc_none_3cyc]>;
+def FalkorReadIncSt : SchedReadAdvance<1, [FalkorWr_LdStInc_none_3cyc]>;
+
// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
// -----------------------------------------------------------------------------
-def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>;
+def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).isImm() &&
+ MI->getOperand(1).getImm() == 0}]>;
def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
MI->getOperand(1).getReg() == AArch64::XZR}]>;
@@ -770,84 +775,113 @@ def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
// SIMD Load Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instrs LD2i64)>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instrs LD2i64_POST)>;
-
-def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD3i64_POST)>;
-def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD4i64_POST)>;
-
-def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instrs LD3Threev2d_POST)>;
-def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instrs LD4Fourv2d_POST)>;
-def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "^LD3Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1none_4cyc],
- (instregex "^LD3Threev(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2none_4cyc],
- (instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>;
-
-def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD3Threev(16b|8h|4s)$")>;
-
-def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>;
-
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc],
- (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
-
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc],
- (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instrs LD2i64)>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instrs LD2i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD1i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
+ (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
+ (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD3i64)>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
+ (instrs LD3i64_POST)>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD4i64)>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
+ (instrs LD4i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD2i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
+ (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instrs LD3Threev2d)>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
+ (instrs LD3Threev2d_POST)>;
+def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD3i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
+ (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instrs LD4Fourv2d)>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
+ (instrs LD4Fourv2d_POST)>;
+def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD4i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
+ (instregex "^LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
+ (instregex "^LD3Threev(8b|4h|2s)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
+ (instregex "^LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
+ (instregex "^LD4Fourv(8b|4h|2s)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD3Threev(16b|8h|4s)$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD4Fourv(16b|8h|4s)$")>;
+
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
+
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
// Arithmetic and Logical Instructions
// -----------------------------------------------------------------------------
@@ -929,87 +963,105 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
// SIMD Store Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STR(Q|D|S|H|B)ui$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STR(Q|D|S|H|B)ui$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^STR(Q|D|S|H|B)(post|pre)$")>;
-def : InstRW<[FalkorWr_STRVro], (instregex "^STR(D|S|H|B)ro(W|X)$")>;
-def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^STPQi$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2VSD_2ST_0cyc],
+def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt],
+ (instregex "^STR(D|S|H|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STPQi$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
(instregex "^STPQ(post|pre)$")>;
-def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STP(D|S)(i)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STP(D|S)(i)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
(instregex "^STP(D|S)(post|pre)$")>;
-def : InstRW<[FalkorWr_STRQro], (instregex "^STRQro(W|X)$")>;
-def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STUR(Q|D|S|B|H)i$")>;
-def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instrs STNPDi, STNPSi)>;
-def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instrs STNPQi)>;
-
-def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
+def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt],
+ (instregex "^STRQro(W|X)$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STUR(Q|D|S|B|H)i$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instrs STNPDi, STNPSi)>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instrs STNPQi)>;
+
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>;
-def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc],
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc],
- (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
-
-def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
-def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
-def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST3(i8|i16|i32|i64)$")>;
-def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST4(i8|i16|i32|i64)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>;
+
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST3(i8|i16|i32|i64)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST4(i8|i16|i32|i64)$")>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST3(i8|i16|i32|i64)_POST$")>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST4(i8|i16|i32|i64)_POST$")>;
-def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc],
- (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>;
+def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST3Three(v8b|v4h|v2s)$")>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc],
- (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>;
-def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
-def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instrs ST3Threev2d)>;
+def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instrs ST3Threev2d)>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc],
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc],
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
(instrs ST3Threev2d_POST)>;
-def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc],
- (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>;
+def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST4Four(v8b|v4h|v2s)$")>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc],
- (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>;
-def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
-def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instrs ST4Fourv2d)>;
+def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instrs ST4Fourv2d)>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc],
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc],
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
(instrs ST4Fourv2d_POST)>;
-def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc],
+def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST3Three(v16b|v8h|v4s)$")>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc],
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
-def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc],
+def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST4Four(v16b|v8h|v4s)$")>;
// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
-def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc],
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
// Branch Instructions
@@ -1033,22 +1085,25 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>;
// FP Load Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
(instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(Q|D|S|H|B)i$")>;
-def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
-def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDUR(Q|D|S|H|B)i$")>;
+def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
+ (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
(instrs LDNPQi)>;
-def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
(instrs LDPQi)>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
(instregex "LDNP(D|S)i$")>;
-def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
(instregex "LDP(D|S)i$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
(instregex "LDP(D|S)(pre|post)$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
(instregex "^LDPQ(pre|post)$")>;
// FP Data Processing Instructions
@@ -1106,31 +1161,41 @@ def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>;
-def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
(instregex "^LDNP(W|X)i$")>;
-def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
(instregex "^LDP(W|X)i$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
(instregex "^LDP(W|X)(post|pre)$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(BB|HH|W|X)ui$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDR(BB|HH|W|X)ui$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
(instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDTR(B|H|W|X)i$")>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(BB|HH|W|X)i$")>;
+def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
+ (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDR(W|X)l$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDTR(B|H|W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDUR(BB|HH|W|X)i$")>;
def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>;
-def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc],
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
(instrs LDPSWi)>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc],
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
(instregex "^LDPSW(post|pre)$")>;
-def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc],
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
+ (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd],
(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
-def : InstRW<[FalkorWr_LDRSro], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
-def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>;
-def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
-def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd],
+ (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
+ (instrs LDRSWl)>;
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
+ (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
+ (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
// Miscellaneous Data-Processing Instructions
// -----------------------------------------------------------------------------
@@ -1178,32 +1243,46 @@ def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HL
def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>;
def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>;
-def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>;
def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>;
def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>;
def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
-def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs STNPWi, STNPXi)>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instrs STNPWi, STNPXi)>;
def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>;
def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>;
-def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STLR(B|H|W|X)$")>;
-def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXP(W|X)$")>;
-def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>;
-
-def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>;
-def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STLR(B|H|W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STXP(W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STXR(B|H|W|X)$")>;
+
+def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STLXP(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STLXR(B|H|W|X)$")>;
// Store Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STP(W|X)i$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc],
+def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STP(W|X)i$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
(instregex "^STP(W|X)(post|pre)$")>;
-def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STR(BB|HH|W|X)ui$")>;
-def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc],
+def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STR(BB|HH|W|X)ui$")>;
+def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
(instregex "^STR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[FalkorWr_STRro], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
-def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STTR(B|H|W|X)i$")>;
-def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STUR(BB|HH|W|X)i$")>;
+def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt],
+ (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STUR(BB|HH|W|X)i$")>;
diff --git a/lib/Target/AArch64/AArch64SchedKryoDetails.td b/lib/Target/AArch64/AArch64SchedKryoDetails.td
index 02cccccd3078c..cf4cdabb8cbfc 100644
--- a/lib/Target/AArch64/AArch64SchedKryoDetails.td
+++ b/lib/Target/AArch64/AArch64SchedKryoDetails.td
@@ -1374,7 +1374,9 @@ def KryoWrite_3cyc_LS_LS_400ln :
let Latency = 3; let NumMicroOps = 2;
}
def : InstRW<[KryoWrite_3cyc_LS_LS_400ln],
- (instregex "(LDAX?R(B|H|W|X)|LDAXP(W|X))")>;
+ (instregex "LDAX?R(B|H|W|X)")>;
+def : InstRW<[KryoWrite_3cyc_LS_LS_400ln, WriteLDHi],
+ (instregex "LDAXP(W|X)")>;
def KryoWrite_3cyc_LS_LS_401ln :
SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
let Latency = 3; let NumMicroOps = 2;
@@ -1565,7 +1567,7 @@ def KryoWrite_3cyc_LS_258ln :
SchedWriteRes<[KryoUnitLS]> {
let Latency = 3; let NumMicroOps = 1;
}
-def : InstRW<[KryoWrite_3cyc_LS_258ln],
+def : InstRW<[KryoWrite_3cyc_LS_258ln, WriteLDHi],
(instregex "LDXP(W|X)")>;
def KryoWrite_3cyc_LS_258_1ln :
SchedWriteRes<[KryoUnitLS]> {
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index d4a8cecdb29f1..6660f0babb8a6 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -47,6 +47,11 @@ static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
cl::desc("Enable the CCMP formation pass"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableCondBrTuning("aarch64-enable-cond-br-tune",
+ cl::desc("Enable the conditional branch tuning pass"),
+ cl::init(true), cl::Hidden);
+
static cl::opt<bool> EnableMCR("aarch64-enable-mcr",
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);
@@ -429,6 +434,8 @@ bool AArch64PassConfig::addILPOpts() {
addPass(createAArch64ConditionalCompares());
if (EnableMCR)
addPass(&MachineCombinerID);
+ if (EnableCondBrTuning)
+ addPass(createAArch64CondBrTuning());
if (EnableEarlyIfConversion)
addPass(&EarlyIfConverterID);
if (EnableStPairSuppress)
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
index f0f50f29be0f3..02b12b5e90ca2 100644
--- a/lib/Target/AArch64/CMakeLists.txt
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -43,6 +43,7 @@ add_llvm_target(AArch64CodeGen
AArch64AsmPrinter.cpp
AArch64CleanupLocalDynamicTLSPass.cpp
AArch64CollectLOH.cpp
+ AArch64CondBrTuning.cpp
AArch64ConditionalCompares.cpp
AArch64DeadRegisterDefinitionsPass.cpp
AArch64ExpandPseudoInsts.cpp
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 43a6fa9ce0896..3d075018904c0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -43,26 +43,25 @@ public:
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
- // This table *must* be in the order that the fixup_* kinds are defined in
- // AArch64FixupKinds.h.
- //
- // Name Offset (bits) Size (bits) Flags
- { "fixup_aarch64_pcrel_adr_imm21", 0, 32, PCRelFlagVal },
- { "fixup_aarch64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal },
- { "fixup_aarch64_add_imm12", 10, 12, 0 },
- { "fixup_aarch64_ldst_imm12_scale1", 10, 12, 0 },
- { "fixup_aarch64_ldst_imm12_scale2", 10, 12, 0 },
- { "fixup_aarch64_ldst_imm12_scale4", 10, 12, 0 },
- { "fixup_aarch64_ldst_imm12_scale8", 10, 12, 0 },
- { "fixup_aarch64_ldst_imm12_scale16", 10, 12, 0 },
- { "fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal },
- { "fixup_aarch64_movw", 5, 16, 0 },
- { "fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal },
- { "fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal },
- { "fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal },
- { "fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal },
- { "fixup_aarch64_tlsdesc_call", 0, 0, 0 }
- };
+ // This table *must* be in the order that the fixup_* kinds are defined
+ // in AArch64FixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ {"fixup_aarch64_pcrel_adr_imm21", 0, 32, PCRelFlagVal},
+ {"fixup_aarch64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal},
+ {"fixup_aarch64_add_imm12", 10, 12, 0},
+ {"fixup_aarch64_ldst_imm12_scale1", 10, 12, 0},
+ {"fixup_aarch64_ldst_imm12_scale2", 10, 12, 0},
+ {"fixup_aarch64_ldst_imm12_scale4", 10, 12, 0},
+ {"fixup_aarch64_ldst_imm12_scale8", 10, 12, 0},
+ {"fixup_aarch64_ldst_imm12_scale16", 10, 12, 0},
+ {"fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal},
+ {"fixup_aarch64_movw", 5, 16, 0},
+ {"fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal},
+ {"fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal},
+ {"fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal},
+ {"fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal},
+ {"fixup_aarch64_tlsdesc_call", 0, 0, 0}};
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -72,8 +71,9 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
bool mayNeedRelaxation(const MCInst &Inst) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -261,13 +261,15 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con
}
}
-void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel, MCContext &Ctx) const {
+void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
if (!Value)
return; // Doesn't change encoding.
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+ MCContext &Ctx = Asm.getContext();
// Apply any target-specific value adjustments.
Value = adjustFixupValue(Fixup, Value, Ctx);
@@ -275,7 +277,7 @@ void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
Value <<= Info.TargetOffset;
unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
// Used to point to big endian bytes.
unsigned FulleSizeInBytes = getFixupKindContainereSizeInBytes(Fixup.getKind());
@@ -289,7 +291,7 @@ void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
} else {
// Handle as big-endian
- assert((Offset + FulleSizeInBytes) <= DataSize && "Invalid fixup size!");
+ assert((Offset + FulleSizeInBytes) <= Data.size() && "Invalid fixup size!");
assert(NumBytes <= FulleSizeInBytes && "Invalid fixup size!");
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = FulleSizeInBytes - 1 - i;
@@ -539,16 +541,14 @@ public:
return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32);
}
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
+ void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, bool &IsResolved) override;
};
-void ELFAArch64AsmBackend::processFixupValue(
- const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup,
- const MCFragment *DF, const MCValue &Target, uint64_t &Value,
- bool &IsResolved) {
+void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target,
+ bool &IsResolved) {
// The ADRP instruction adds some multiple of 0x1000 to the current PC &
// ~0xfff. This means that the required offset to reach a symbol can vary by
// up to one step depending on where the ADRP is in memory. For example:
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 6ab2b9ef04598..7494e5decd6f6 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -238,6 +238,36 @@ def FeatureSDWA : SubtargetFeature<"sdwa",
"Support SDWA (Sub-DWORD Addressing) extension"
>;
+def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod",
+ "HasSDWAOmod",
+ "true",
+ "Support OMod with SDWA (Sub-DWORD Addressing) extension"
+>;
+
+def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar",
+ "HasSDWAScalar",
+ "true",
+ "Support scalar register with SDWA (Sub-DWORD Addressing) extension"
+>;
+
+def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst",
+ "HasSDWASdst",
+ "true",
+ "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension"
+>;
+
+def FeatureSDWAMac : SubtargetFeature<"sdwa-mav",
+ "HasSDWAMac",
+ "true",
+ "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension"
+>;
+
+def FeatureSDWAClampVOPC : SubtargetFeature<"sdwa-clamp-vopc",
+ "HasSDWAClampVOPC",
+ "true",
+ "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension"
+>;
+
def FeatureDPP : SubtargetFeature<"dpp",
"HasDPP",
"true",
@@ -421,8 +451,8 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
- FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
- FeatureDPP
+ FeatureScalarStores, FeatureInv2PiInlineImm,
+ FeatureSDWA, FeatureSDWAClampVOPC, FeatureSDWAMac, FeatureDPP
]
>;
@@ -432,7 +462,8 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
- FeatureFastFMAF32, FeatureSDWA, FeatureDPP,
+ FeatureFastFMAF32, FeatureDPP,
+ FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts
]
>;
@@ -449,14 +480,14 @@ class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0,
[FeatureSouthernIslands,
- FeatureFastFMAF32,
+ FeatureFastFMAF32,
HalfRate64Ops,
FeatureLDSBankCount32]>;
def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1,
[FeatureSouthernIslands,
FeatureLDSBankCount32]>;
-
+
def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
[FeatureSeaIslands,
FeatureLDSBankCount32]>;
@@ -644,7 +675,11 @@ def isCIVI : Predicate <
"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
AssemblerPredicate<"FeatureCIInsts">;
-def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
+def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
+ AssemblerPredicate<"FeatureFlatAddressSpace">;
+
+def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
+ AssemblerPredicate<"FeatureFlatGlobalInsts">;
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
AssemblerPredicate<"Feature16BitInsts">;
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5586b513b5fca..96f819fd0e684 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3527,18 +3527,25 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
//===----------------------------------------------------------------------===//
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
- const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const {
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT,
+ const SDLoc &SL,
+ bool RawReg) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned VirtualRegister;
+ unsigned VReg;
+
if (!MRI.isLiveIn(Reg)) {
- VirtualRegister = MRI.createVirtualRegister(RC);
- MRI.addLiveIn(Reg, VirtualRegister);
+ VReg = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(Reg, VReg);
} else {
- VirtualRegister = MRI.getLiveInVirtReg(Reg);
+ VReg = MRI.getLiveInVirtReg(Reg);
}
- return DAG.getRegister(VirtualRegister, VT);
+
+ if (RawReg)
+ return DAG.getRegister(VReg, VT);
+
+ return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT);
}
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
@@ -3657,6 +3664,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(LOAD_CONSTANT)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
+ NODE_NAME_CASE(TBUFFER_STORE_FORMAT_X3)
+ NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
NODE_NAME_CASE(ATOMIC_CMP_SWAP)
NODE_NAME_CASE(ATOMIC_INC)
NODE_NAME_CASE(ATOMIC_DEC)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 0d066cdbdff4d..a45234e2b39f2 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -216,10 +216,25 @@ public:
/// \brief Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
///
- /// \returns a RegisterSDNode representing Reg.
- virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
- const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const;
+ /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise
+ /// a copy from the register.
+ SDValue CreateLiveInRegister(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT,
+ const SDLoc &SL,
+ bool RawReg = false) const;
+ SDValue CreateLiveInRegister(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const {
+ return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()));
+ }
+
+ // Returns the raw live in register rather than a copy from it.
+ SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const {
+ return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true);
+ }
enum ImplicitParameter {
FIRST_IMPLICIT,
@@ -388,6 +403,8 @@ enum NodeType : unsigned {
STORE_MSKOR,
LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,
+ TBUFFER_STORE_FORMAT_X3,
+ TBUFFER_LOAD_FORMAT,
ATOMIC_CMP_SWAP,
ATOMIC_INC,
ATOMIC_DEC,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index a01f5d37c7c16..69dc529861729 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -66,7 +66,9 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
SI = 0,
- VI = 1
+ VI = 1,
+ SDWA = 2,
+ SDWA9 = 3
};
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
@@ -101,7 +103,12 @@ static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
}
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
- int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));
+ SIEncodingFamily Gen = subtargetEncodingFamily(ST);
+ if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
+ Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
+ : SIEncodingFamily::SDWA;
+
+ int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index e286558ce60d7..bcf89bb78ad66 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -70,6 +70,10 @@ def AMDGPUElseBreakOp : SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>]
>;
+def AMDGPUAddeSubeOp : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>]
+>;
+
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
@@ -179,6 +183,12 @@ def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
// out = (src1 > src0) ? 1 : 0
def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
+// TODO: remove AMDGPUadde/AMDGPUsube when ADDCARRY/SUBCARRY get their own
+// nodes in TargetSelectionDAG.td.
+def AMDGPUadde : SDNode<"ISD::ADDCARRY", AMDGPUAddeSubeOp, []>;
+
+def AMDGPUsube : SDNode<"ISD::SUBCARRY", AMDGPUAddeSubeOp, []>;
+
def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
]>;
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 790a69b843979..cc56216c355bf 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -29,12 +29,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
using namespace TargetOpcode;
const LLT S1= LLT::scalar(1);
+ const LLT V2S16 = LLT::vector(2, 16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
const LLT P1 = LLT::pointer(1, 64);
const LLT P2 = LLT::pointer(2, 64);
setAction({G_ADD, S32}, Legal);
+ setAction({G_AND, S32}, Legal);
+
+ setAction({G_BITCAST, V2S16}, Legal);
+ setAction({G_BITCAST, 1, S32}, Legal);
+
+ setAction({G_BITCAST, S32}, Legal);
+ setAction({G_BITCAST, 1, V2S16}, Legal);
// FIXME: i1 operands to intrinsics should always be legal, but other i1
// values may not be legal. We need to figure out how to distinguish
@@ -61,6 +69,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
setAction({G_SELECT, S32}, Legal);
setAction({G_SELECT, 1, S1}, Legal);
+ setAction({G_SHL, S32}, Legal);
+
setAction({G_STORE, S32}, Legal);
setAction({G_STORE, 1, P1}, Legal);
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 8d157e2f98f24..ab5abf2039a5b 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -124,6 +124,11 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasScalarStores(false),
HasInv2PiInlineImm(false),
HasSDWA(false),
+ HasSDWAOmod(false),
+ HasSDWAScalar(false),
+ HasSDWASdst(false),
+ HasSDWAMac(false),
+ HasSDWAClampVOPC(false),
HasDPP(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 5f4f20316a6ba..2b16289c723ef 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -149,6 +149,11 @@ protected:
bool HasScalarStores;
bool HasInv2PiInlineImm;
bool HasSDWA;
+ bool HasSDWAOmod;
+ bool HasSDWAScalar;
+ bool HasSDWASdst;
+ bool HasSDWAMac;
+ bool HasSDWAClampVOPC;
bool HasDPP;
bool FlatAddressSpace;
bool FlatInstOffsets;
@@ -431,6 +436,26 @@ public:
return HasSDWA;
}
+ bool hasSDWAOmod() const {
+ return HasSDWAOmod;
+ }
+
+ bool hasSDWAScalar() const {
+ return HasSDWAScalar;
+ }
+
+ bool hasSDWASdst() const {
+ return HasSDWASdst;
+ }
+
+ bool hasSDWAMac() const {
+ return HasSDWAMac;
+ }
+
+ bool hasSDWAClampVOPC() const {
+ return HasSDWAClampVOPC;
+ }
+
/// \brief Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index b644eba536fa4..04fe9f689806c 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -342,6 +342,14 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
PM.add(createAMDGPUExternalAAWrapperPass());
}
});
+
+ Builder.addExtension(
+ PassManagerBuilder::EP_CGSCCOptimizerLate,
+ [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ // Add infer address spaces pass to the opt pipeline after inlining
+ // but before SROA to increase SROA opportunities.
+ PM.add(createInferAddressSpacesPass());
+ });
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0d6689bd04c4e..88245b01683a5 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -184,9 +184,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
}
}
-unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
- if (Vec)
- return 0;
+unsigned AMDGPUTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
+ // The concept of vector registers doesn't really exist. Some packed vector
+ // operations operate on the normal 32-bit registers.
// Number of VGPRs on SI.
if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
@@ -195,8 +195,18 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
+unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) const {
+ // This is really the number of registers to fill when vectorizing /
+ // interleaving loops, so we lie to avoid trying to use all registers.
+ return getHardwareNumberOfRegisters(Vec) >> 3;
+}
+
unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) const {
- return Vector ? 0 : 32;
+ return 32;
+}
+
+unsigned AMDGPUTTIImpl::getMinVectorRegisterBitWidth() const {
+ return 32;
}
unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
@@ -247,11 +257,11 @@ bool AMDGPUTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// Disable unrolling if the loop is not vectorized.
+ // TODO: Enable this again.
if (VF == 1)
return 1;
- // Semi-arbitrary large amount.
- return 64;
+ return 8;
}
int AMDGPUTTIImpl::getArithmeticInstrCost(
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index a60b1bb1b59c7..485e20411ab49 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -75,8 +75,10 @@ public:
return TTI::PSK_FastHardware;
}
- unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector) const;
+ unsigned getHardwareNumberOfRegisters(bool Vector) const;
+ unsigned getNumberOfRegisters(bool Vector) const;
+ unsigned getRegisterBitWidth(bool Vector) const ;
+ unsigned getMinVectorRegisterBitWidth() const;
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 392e9d89bd9ba..7b8756050b752 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -152,6 +152,8 @@ public:
ImmTyExpTgt,
ImmTyExpCompr,
ImmTyExpVM,
+ ImmTyDFMT,
+ ImmTyNFMT,
ImmTyHwreg,
ImmTyOff,
ImmTySendMsg,
@@ -260,6 +262,8 @@ public:
return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
}
+ bool isSDWARegKind() const;
+
bool isImmTy(ImmTy ImmT) const {
return isImm() && Imm.Type == ImmT;
}
@@ -292,6 +296,8 @@ public:
bool isGLC() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
+ bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
+ bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
@@ -636,6 +642,8 @@ public:
case ImmTyGLC: OS << "GLC"; break;
case ImmTySLC: OS << "SLC"; break;
case ImmTyTFE: OS << "TFE"; break;
+ case ImmTyDFMT: OS << "DFMT"; break;
+ case ImmTyNFMT: OS << "NFMT"; break;
case ImmTyClampSI: OS << "ClampSI"; break;
case ImmTyOModSI: OS << "OModSI"; break;
case ImmTyDppCtrl: OS << "DppCtrl"; break;
@@ -993,7 +1001,9 @@ private:
void errorExpTgt();
OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
- bool validateOperandLimitations(const MCInst &Inst);
+ bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
+ bool validateConstantBusLimitations(const MCInst &Inst);
+ bool validateEarlyClobberLimitations(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
@@ -1029,6 +1039,8 @@ public:
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
+ void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
+
AMDGPUOperand::Ptr defaultGLC() const;
AMDGPUOperand::Ptr defaultSLC() const;
AMDGPUOperand::Ptr defaultTFE() const;
@@ -1042,6 +1054,7 @@ public:
AMDGPUOperand::Ptr defaultSMRDOffset20() const;
AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
AMDGPUOperand::Ptr defaultOffsetU12() const;
+ AMDGPUOperand::Ptr defaultOffsetS13() const;
OperandMatchResultTy parseOModOperand(OperandVector &Operands);
@@ -1243,6 +1256,15 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {
return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
}
+bool AMDGPUOperand::isSDWARegKind() const {
+ if (AsmParser->isVI())
+ return isVReg();
+ else if (AsmParser->isGFX9())
+ return isRegKind();
+ else
+ return false;
+}
+
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
{
assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
@@ -2083,7 +2105,7 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
}
-bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) {
+bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
unsigned ConstantBusUseCount = 0;
@@ -2137,6 +2159,60 @@ bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) {
return ConstantBusUseCount <= 1;
}
+bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
+
+ const unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opcode);
+
+ const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
+ if (DstIdx == -1 ||
+ Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
+ return true;
+ }
+
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ assert(DstIdx != -1);
+ const MCOperand &Dst = Inst.getOperand(DstIdx);
+ assert(Dst.isReg());
+ const unsigned DstReg = mc2PseudoReg(Dst.getReg());
+
+ const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+
+ for (int SrcIdx : SrcIndices) {
+ if (SrcIdx == -1) break;
+ const MCOperand &Src = Inst.getOperand(SrcIdx);
+ if (Src.isReg()) {
+ const unsigned SrcReg = mc2PseudoReg(Src.getReg());
+ if (isRegIntersect(DstReg, SrcReg, TRI)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
+ const SMLoc &IDLoc) {
+ if (!validateConstantBusLimitations(Inst)) {
+ Error(IDLoc,
+ "invalid operand (violates constant bus restrictions)");
+ return false;
+ }
+ if (!validateEarlyClobberLimitations(Inst)) {
+ Error(IDLoc,
+ "destination must be different than all sources");
+ return false;
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -2169,9 +2245,8 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (Result) {
default: break;
case Match_Success:
- if (!validateOperandLimitations(Inst)) {
- return Error(IDLoc,
- "invalid operand (violates constant bus restrictions)");
+ if (!validateInstruction(Inst, IDLoc)) {
+ return true;
}
Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst, getSTI());
@@ -2554,11 +2629,21 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
return MatchOperand_ParseFail;
Parser.Lex();
+
+ bool IsMinus = false;
+ if (getLexer().getKind() == AsmToken::Minus) {
+ Parser.Lex();
+ IsMinus = true;
+ }
+
if (getLexer().isNot(AsmToken::Integer))
return MatchOperand_ParseFail;
if (getParser().parseAbsoluteExpression(Int))
return MatchOperand_ParseFail;
+
+ if (IsMinus)
+ Int = -Int;
break;
}
}
@@ -3743,6 +3828,44 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
+void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle the case where soffset is an immediate
+ if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+ Op.addImmOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle tokens like 'offen' which are sometimes hard-coded into the
+ // asm string. There are no MCInst operands for these.
+ if (Op.isToken()) {
+ continue;
+ }
+ assert(Op.isImm());
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTyOffset);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
+}
+
//===----------------------------------------------------------------------===//
// mimg
//===----------------------------------------------------------------------===//
@@ -3870,6 +3993,10 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+}
+
//===----------------------------------------------------------------------===//
// vop3
//===----------------------------------------------------------------------===//
@@ -3919,6 +4046,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
{"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
+ {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr},
+ {"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
@@ -4475,12 +4604,11 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
- // V_NOP_sdwa_vi has no optional sdwa arguments
+ // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
switch (BasicInstType) {
case SIInstrFlags::VOP1:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
- if (isGFX9() &&
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
@@ -4490,8 +4618,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
case SIInstrFlags::VOP2:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
- if (isGFX9() &&
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
@@ -4501,9 +4628,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
break;
case SIInstrFlags::VOPC:
- if (isVI()) {
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
- }
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td
index 2aca65ac84303..2e96c14eaa320 100644
--- a/lib/Target/AMDGPU/BUFInstructions.td
+++ b/lib/Target/AMDGPU/BUFInstructions.td
@@ -57,6 +57,11 @@ class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
string OpName = NAME # suffix;
}
+class MTBUFAddr64Table <bit is_addr64, string suffix = ""> {
+ bit IsAddr64 = is_addr64;
+ string OpName = NAME # suffix;
+}
+
//===----------------------------------------------------------------------===//
// MTBUF classes
//===----------------------------------------------------------------------===//
@@ -78,14 +83,31 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
let EXP_CNT = 1;
let MTBUF = 1;
let Uses = [EXEC];
-
let hasSideEffects = 0;
let SchedRW = [WriteVMEM];
+
+ let AsmMatchConverter = "cvtMtbuf";
+
+ bits<1> offen = 0;
+ bits<1> idxen = 0;
+ bits<1> addr64 = 0;
+ bits<1> has_vdata = 1;
+ bits<1> has_vaddr = 1;
+ bits<1> has_glc = 1;
+ bits<1> glc_value = 0; // the value for glc if no such operand
+ bits<4> dfmt_value = 1; // the value for dfmt if no such operand
+ bits<3> nfmt_value = 0; // the value for nfmt if no such operand
+ bits<1> has_srsrc = 1;
+ bits<1> has_soffset = 1;
+ bits<1> has_offset = 1;
+ bits<1> has_slc = 1;
+ bits<1> has_tfe = 1;
+ bits<1> has_dfmt = 1;
+ bits<1> has_nfmt = 1;
}
class MTBUF_Real <MTBUF_Pseudo ps> :
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
- Enc64 {
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -97,57 +119,168 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
- bits<8> vdata;
bits<12> offset;
- bits<1> offen;
- bits<1> idxen;
- bits<1> glc;
- bits<1> addr64;
- bits<4> dfmt;
- bits<3> nfmt;
- bits<8> vaddr;
- bits<7> srsrc;
- bits<1> slc;
- bits<1> tfe;
- bits<8> soffset;
-
- let Inst{11-0} = offset;
- let Inst{12} = offen;
- let Inst{13} = idxen;
- let Inst{14} = glc;
- let Inst{22-19} = dfmt;
- let Inst{25-23} = nfmt;
- let Inst{31-26} = 0x3a; //encoding
- let Inst{39-32} = vaddr;
- let Inst{47-40} = vdata;
- let Inst{52-48} = srsrc{6-2};
- let Inst{54} = slc;
- let Inst{55} = tfe;
- let Inst{63-56} = soffset;
+ bits<1> glc;
+ bits<4> dfmt;
+ bits<3> nfmt;
+ bits<8> vaddr;
+ bits<8> vdata;
+ bits<7> srsrc;
+ bits<1> slc;
+ bits<1> tfe;
+ bits<8> soffset;
+}
+
+class getMTBUFInsDA<list<RegisterClass> vdataList,
+ list<RegisterClass> vaddrList=[]> {
+ RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList));
+ RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
+ dag InsNoData = !if(!empty(vaddrList),
+ (ins SReg_128:$srsrc, SCSrc_b32:$soffset,
+ offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe),
+ (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
+ offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe)
+ );
+ dag InsData = !if(!empty(vaddrList),
+ (ins vdataClass:$vdata, SReg_128:$srsrc,
+ SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+ slc:$slc, tfe:$tfe),
+ (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
+ SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+ slc:$slc, tfe:$tfe)
+ );
+ dag ret = !if(!empty(vdataList), InsNoData, InsData);
}
-class MTBUF_Load_Pseudo <string opName, RegisterClass regClass> : MTBUF_Pseudo <
- opName, (outs regClass:$dst),
- (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
- i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc,
- i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
- " $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"#
- " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> {
+class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
+ dag ret =
+ !if(!eq(addrKind, BUFAddrKind.Offset), getMTBUFInsDA<vdataList>.ret,
+ !if(!eq(addrKind, BUFAddrKind.OffEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret,
+ !if(!eq(addrKind, BUFAddrKind.IdxEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret,
+ !if(!eq(addrKind, BUFAddrKind.BothEn), getMTBUFInsDA<vdataList, [VReg_64]>.ret,
+ !if(!eq(addrKind, BUFAddrKind.Addr64), getMTBUFInsDA<vdataList, [VReg_64]>.ret,
+ (ins))))));
+}
+
+class getMTBUFAsmOps<int addrKind> {
+ string Pfx =
+ !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $dfmt, $nfmt, $soffset",
+ !if(!eq(addrKind, BUFAddrKind.OffEn),
+ "$vaddr, $srsrc, $dfmt, $nfmt, $soffset offen",
+ !if(!eq(addrKind, BUFAddrKind.IdxEn),
+ "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen",
+ !if(!eq(addrKind, BUFAddrKind.BothEn),
+ "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen offen",
+ !if(!eq(addrKind, BUFAddrKind.Addr64),
+ "$vaddr, $srsrc, $dfmt, $nfmt, $soffset addr64",
+ "")))));
+ string ret = Pfx # "$offset";
+}
+
+class MTBUF_SetupAddr<int addrKind> {
+ bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1,
+ !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
+
+ bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1,
+ !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
+
+ bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0);
+
+ bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1);
+}
+
+class MTBUF_Load_Pseudo <string opName,
+ int addrKind,
+ RegisterClass vdataClass,
+ list<dag> pattern=[],
+ // Workaround bug bz30254
+ int addrKindCopy = addrKind>
+ : MTBUF_Pseudo<opName,
+ (outs vdataClass:$vdata),
+ getMTBUFIns<addrKindCopy>.ret,
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+ pattern>,
+ MTBUF_SetupAddr<addrKindCopy> {
+ let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let mayLoad = 1;
let mayStore = 0;
}
-class MTBUF_Store_Pseudo <string opName, RegisterClass regClass> : MTBUF_Pseudo <
- opName, (outs),
- (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
- i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr,
- SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
- " $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"#
- " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> {
+multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
+ ValueType load_vt = i32,
+ SDPatternOperator ld = null_frag> {
+
+ def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+ [(set load_vt:$vdata,
+ (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$dfmt,
+ i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ MTBUFAddr64Table<0>;
+
+ def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+ [(set load_vt:$vdata,
+ (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
+ i8:$dfmt, i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ MTBUFAddr64Table<1>;
+
+ def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+ def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+ def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+
+ let DisableWQM = 1 in {
+ def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
+ def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+ def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+ def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ }
+}
+
+class MTBUF_Store_Pseudo <string opName,
+ int addrKind,
+ RegisterClass vdataClass,
+ list<dag> pattern=[],
+ // Workaround bug bz30254
+ int addrKindCopy = addrKind,
+ RegisterClass vdataClassCopy = vdataClass>
+ : MTBUF_Pseudo<opName,
+ (outs),
+ getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+ pattern>,
+ MTBUF_SetupAddr<addrKindCopy> {
+ let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let mayLoad = 0;
let mayStore = 1;
}
+multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
+ ValueType store_vt = i32,
+ SDPatternOperator st = null_frag> {
+
+ def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+ [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
+ i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+ i1:$slc, i1:$tfe))]>,
+ MTBUFAddr64Table<0>;
+
+ def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+ [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+ i1:$slc, i1:$tfe))]>,
+ MTBUFAddr64Table<1>;
+
+ def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+ def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+ def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+
+ let DisableWQM = 1 in {
+ def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
+ def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+ def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+ def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// MUBUF classes
//===----------------------------------------------------------------------===//
@@ -676,14 +809,14 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
// MTBUF Instructions
//===----------------------------------------------------------------------===//
-//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0, "tbuffer_load_format_x", []>;
-//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <1, "tbuffer_load_format_xy", []>;
-//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <2, "tbuffer_load_format_xyz", []>;
-def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Pseudo <"tbuffer_load_format_xyzw", VReg_128>;
-def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Pseudo <"tbuffer_store_format_x", VGPR_32>;
-def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Pseudo <"tbuffer_store_format_xy", VReg_64>;
-def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Pseudo <"tbuffer_store_format_xyz", VReg_128>;
-def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Pseudo <"tbuffer_store_format_xyzw", VReg_128>;
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>;
+defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_128>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
} // End let SubtargetPredicate = isGCN
@@ -1093,22 +1226,98 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OF
// MTBUF Patterns
//===----------------------------------------------------------------------===//
-// TBUFFER_STORE_FORMAT_*, addr64=0
-class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF_Pseudo opcode> : Pat<
- (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
- i32:$soffset, imm:$inst_offset, imm:$dfmt,
- imm:$nfmt, imm:$offen, imm:$idxen,
- imm:$glc, imm:$slc, imm:$tfe),
- (opcode
- $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
- (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
- (as_i1imm $slc), (as_i1imm $tfe), $soffset)
->;
+//===----------------------------------------------------------------------===//
+// tbuffer_load/store_format patterns
+//===----------------------------------------------------------------------===//
+
+multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
+ string opcode> {
+ def : Pat<
+ (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
+ (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ >;
+
+ def : Pat<
+ (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
+ (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ >;
+
+ def : Pat<
+ (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
+ (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ >;
-def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
-def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
-def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
-def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
+ def : Pat<
+ (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ (!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
+ (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+ $rsrc, $soffset, (as_i16imm $offset),
+ (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ >;
+}
+
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, i32, "TBUFFER_LOAD_FORMAT_X">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2i32, "TBUFFER_LOAD_FORMAT_XY">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4i32, "TBUFFER_LOAD_FORMAT_XYZW">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, f32, "TBUFFER_LOAD_FORMAT_X">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;
+
+multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
+ string opcode> {
+ def : Pat<
+ (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
+ (as_i16imm $offset), (as_i8imm $dfmt),
+ (as_i8imm $nfmt), (as_i1imm $glc),
+ (as_i1imm $slc), 0)
+ >;
+
+ def : Pat<
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
+ (as_i16imm $offset), (as_i8imm $dfmt),
+ (as_i8imm $nfmt), (as_i1imm $glc),
+ (as_i1imm $slc), 0)
+ >;
+
+ def : Pat<
+ (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
+ (as_i16imm $offset), (as_i8imm $dfmt),
+ (as_i8imm $nfmt), (as_i1imm $glc),
+ (as_i1imm $slc), 0)
+ >;
+
+ def : Pat<
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
+ imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ (!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
+ $vdata,
+ (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+ $rsrc, $soffset, (as_i16imm $offset),
+ (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ >;
+}
+
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, i32, "TBUFFER_STORE_FORMAT_X">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2i32, "TBUFFER_STORE_FORMAT_XY">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4i32, "TBUFFER_STORE_FORMAT_XYZ">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4i32, "TBUFFER_STORE_FORMAT_XYZW">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, f32, "TBUFFER_STORE_FORMAT_X">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4f32, "TBUFFER_STORE_FORMAT_XYZ">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;
} // End let Predicates = [isGCN]
@@ -1224,21 +1433,44 @@ def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>;
class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
MTBUF_Real<ps>,
+ Enc64,
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
let AssemblerPredicate=isSICI;
let DecoderNamespace="SICI";
- bits<1> addr64;
- let Inst{15} = addr64;
+ let Inst{11-0} = !if(ps.has_offset, offset, ?);
+ let Inst{12} = ps.offen;
+ let Inst{13} = ps.idxen;
+ let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
+ let Inst{15} = ps.addr64;
let Inst{18-16} = op;
+ let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
+ let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
+ let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
+ let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
+ let Inst{54} = !if(ps.has_slc, slc, ?);
+ let Inst{55} = !if(ps.has_tfe, tfe, ?);
+ let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
-def TBUFFER_LOAD_FORMAT_XYZW_si : MTBUF_Real_si <3, TBUFFER_LOAD_FORMAT_XYZW>;
-def TBUFFER_STORE_FORMAT_X_si : MTBUF_Real_si <4, TBUFFER_STORE_FORMAT_X>;
-def TBUFFER_STORE_FORMAT_XY_si : MTBUF_Real_si <5, TBUFFER_STORE_FORMAT_XY>;
-def TBUFFER_STORE_FORMAT_XYZ_si : MTBUF_Real_si <6, TBUFFER_STORE_FORMAT_XYZ>;
-def TBUFFER_STORE_FORMAT_XYZW_si : MTBUF_Real_si <7, TBUFFER_STORE_FORMAT_XYZW>;
+multiclass MTBUF_Real_AllAddr_si<bits<3> op> {
+ def _OFFSET_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+ def _ADDR64_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
+ def _OFFEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _IDXEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _BOTHEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+}
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>;
+//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
//===----------------------------------------------------------------------===//
// CI
@@ -1350,16 +1582,39 @@ def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>;
class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> :
MTBUF_Real<ps>,
+ Enc64,
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
let AssemblerPredicate=isVI;
let DecoderNamespace="VI";
+ let Inst{11-0} = !if(ps.has_offset, offset, ?);
+ let Inst{12} = ps.offen;
+ let Inst{13} = ps.idxen;
+ let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{18-15} = op;
+ let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
+ let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
+ let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
+ let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
+ let Inst{54} = !if(ps.has_slc, slc, ?);
+ let Inst{55} = !if(ps.has_tfe, tfe, ?);
+ let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
-def TBUFFER_LOAD_FORMAT_XYZW_vi : MTBUF_Real_vi <3, TBUFFER_LOAD_FORMAT_XYZW>;
-def TBUFFER_STORE_FORMAT_X_vi : MTBUF_Real_vi <4, TBUFFER_STORE_FORMAT_X>;
-def TBUFFER_STORE_FORMAT_XY_vi : MTBUF_Real_vi <5, TBUFFER_STORE_FORMAT_XY>;
-def TBUFFER_STORE_FORMAT_XYZ_vi : MTBUF_Real_vi <6, TBUFFER_STORE_FORMAT_XYZ>;
-def TBUFFER_STORE_FORMAT_XYZW_vi : MTBUF_Real_vi <7, TBUFFER_STORE_FORMAT_XYZW>;
+multiclass MTBUF_Real_AllAddr_vi<bits<4> op> {
+ def _OFFSET_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+ def _OFFEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _IDXEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _BOTHEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+}
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <1>;
+//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <2>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <3>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <4>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <5>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <6>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <7>;
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 88c92b9582fd0..04308fb3aaf64 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -49,6 +49,17 @@ addOperand(MCInst &Inst, const MCOperand& Opnd) {
MCDisassembler::SoftFail;
}
+static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
+ uint16_t NameIdx) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
+ if (OpIdx != -1) {
+ auto I = MI.begin();
+ std::advance(I, OpIdx);
+ MI.insert(I, Op);
+ }
+ return OpIdx;
+}
+
static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
@@ -106,12 +117,12 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
}
-#define DECODE_SDWA9(DecName) \
-DECODE_OPERAND(decodeSDWA9##DecName, decodeSDWA9##DecName)
+#define DECODE_SDWA(DecName) \
+DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
-DECODE_SDWA9(Src32)
-DECODE_SDWA9(Src16)
-DECODE_SDWA9(VopcDst)
+DECODE_SDWA(Src32)
+DECODE_SDWA(Src16)
+DECODE_SDWA(VopcDst)
#include "AMDGPUGenDisassemblerTables.inc"
@@ -149,6 +160,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
raw_ostream &WS,
raw_ostream &CS) const {
CommentStream = &CS;
+ bool IsSDWA = false;
// ToDo: AMDGPUDisassembler supports only VI ISA.
if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding])
@@ -170,10 +182,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res) break;
Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
- if (Res) break;
+ if (Res) { IsSDWA = true; break; }
Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
- if (Res) break;
+ if (Res) { IsSDWA = true; break; }
}
// Reinitialize Bytes as DPP64 could have eaten too much
@@ -200,17 +212,36 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi)) {
// Insert dummy unused src2_modifiers.
- int Src2ModIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::src2_modifiers);
- auto I = MI.begin();
- std::advance(I, Src2ModIdx);
- MI.insert(I, MCOperand::createImm(0));
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src2_modifiers);
}
+ if (Res && IsSDWA)
+ Res = convertSDWAInst(MI);
+
Size = Res ? (MaxInstBytesNum - Bytes.size()) : 0;
return Res;
}
+DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
+ if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
+ // VOPC - insert clamp
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
+ } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
+ int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
+ if (SDst != -1) {
+ // VOPC - insert VCC register as sdst
+ insertNamedMCOperand(MI, MCOperand::createReg(AMDGPU::VCC),
+ AMDGPU::OpName::sdst);
+ } else {
+ // VOP1/2 - insert omod if present in instruction
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
+ }
+ }
+ return MCDisassembler::Success;
+}
+
const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
return getContext().getRegisterInfo()->
getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
@@ -524,8 +555,6 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
return createSRegOperand(getTtmpClassId(Width), Val - TTMP_MIN);
}
- assert(Width == OPW16 || Width == OPW32 || Width == OPW64);
-
if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
return decodeIntImmed(Val);
@@ -592,36 +621,43 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
return errOperand(Val, "unknown operand encoding " + Twine(Val));
}
-MCOperand AMDGPUDisassembler::decodeSDWA9Src(const OpWidthTy Width,
- unsigned Val) const {
+MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
+ unsigned Val) const {
using namespace AMDGPU::SDWA;
- if (SDWA9EncValues::SRC_VGPR_MIN <= Val &&
- Val <= SDWA9EncValues::SRC_VGPR_MAX) {
- return createRegOperand(getVgprClassId(Width),
- Val - SDWA9EncValues::SRC_VGPR_MIN);
- }
- if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
- Val <= SDWA9EncValues::SRC_SGPR_MAX) {
- return createSRegOperand(getSgprClassId(Width),
- Val - SDWA9EncValues::SRC_SGPR_MIN);
- }
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
+ if (SDWA9EncValues::SRC_VGPR_MIN <= Val &&
+ Val <= SDWA9EncValues::SRC_VGPR_MAX) {
+ return createRegOperand(getVgprClassId(Width),
+ Val - SDWA9EncValues::SRC_VGPR_MIN);
+ }
+ if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
+ Val <= SDWA9EncValues::SRC_SGPR_MAX) {
+ return createSRegOperand(getSgprClassId(Width),
+ Val - SDWA9EncValues::SRC_SGPR_MIN);
+ }
- return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN);
+ return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN);
+ } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
+ return createRegOperand(getVgprClassId(Width), Val);
+ }
+ llvm_unreachable("unsupported target");
}
-MCOperand AMDGPUDisassembler::decodeSDWA9Src16(unsigned Val) const {
- return decodeSDWA9Src(OPW16, Val);
+MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
+ return decodeSDWASrc(OPW16, Val);
}
-MCOperand AMDGPUDisassembler::decodeSDWA9Src32(unsigned Val) const {
- return decodeSDWA9Src(OPW32, Val);
+MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
+ return decodeSDWASrc(OPW32, Val);
}
-MCOperand AMDGPUDisassembler::decodeSDWA9VopcDst(unsigned Val) const {
+MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
using namespace AMDGPU::SDWA;
+ assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] &&
+ "SDWAVopcDst should be present only on GFX9");
if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
if (Val > AMDGPU::EncValues::SGPR_MAX) {
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 5fa3cf1a223fa..3d71db909e20d 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -65,6 +65,8 @@ public:
uint64_t Inst,
uint64_t Address) const;
+ DecodeStatus convertSDWAInst(MCInst &MI) const;
+
MCOperand decodeOperand_VGPR_32(unsigned Val) const;
MCOperand decodeOperand_VS_32(unsigned Val) const;
MCOperand decodeOperand_VS_64(unsigned Val) const;
@@ -105,10 +107,10 @@ public:
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
- MCOperand decodeSDWA9Src(const OpWidthTy Width, unsigned Val) const;
- MCOperand decodeSDWA9Src16(unsigned Val) const;
- MCOperand decodeSDWA9Src32(unsigned Val) const;
- MCOperand decodeSDWA9VopcDst(unsigned Val) const;
+ MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val) const;
+ MCOperand decodeSDWASrc16(unsigned Val) const;
+ MCOperand decodeSDWASrc32(unsigned Val) const;
+ MCOperand decodeSDWAVopcDst(unsigned Val) const;
};
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td
index 98eda288bcacb..edca6fcd812c8 100644
--- a/lib/Target/AMDGPU/FLATInstructions.td
+++ b/lib/Target/AMDGPU/FLATInstructions.td
@@ -31,8 +31,6 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
let VM_CNT = 1;
let LGKM_CNT = 1;
- let Uses = [EXEC, FLAT_SCR]; // M0
-
let UseNamedOperandTable = 1;
let hasSideEffects = 0;
let SchedRW = [WriteVMEM];
@@ -40,10 +38,16 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
string Mnemonic = opName;
string AsmOperands = asmOps;
+ bits<1> is_flat_global = 0;
+ bits<1> is_flat_scratch = 0;
+
bits<1> has_vdst = 1;
bits<1> has_data = 1;
bits<1> has_glc = 1;
bits<1> glcValue = 0;
+
+ // TODO: M0 if it could possibly access LDS (before gfx9? only)?
+ let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
}
class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
@@ -68,7 +72,10 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
// Only valid on gfx9
bits<1> lds = 0; // XXX - What does this actually do?
- bits<2> seg; // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
+
+ // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
+ bits<2> seg = !if(ps.is_flat_global, 0b10,
+ !if(ps.is_flat_scratch, 0b01, 0));
// Signed offset. Highest bit ignored for flat and treated as 12-bit
// unsigned for flat acceses.
@@ -81,7 +88,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
// Only valid on GFX9+
let Inst{12-0} = offset;
let Inst{13} = lds;
- let Inst{15-14} = 0;
+ let Inst{15-14} = seg;
let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
let Inst{17} = slc;
@@ -106,6 +113,16 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
let mayLoad = 1;
}
+class FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass> :
+ FLAT_Load_Pseudo<opName, regClass, 1> {
+ let is_flat_global = 1;
+}
+
+class FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> :
+ FLAT_Load_Pseudo<opName, regClass, 1> {
+ let is_flat_scratch = 1;
+}
+
class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
bit HasSignedOffset = 0> : FLAT_Pseudo<
opName,
@@ -119,6 +136,16 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
let has_vdst = 0;
}
+class FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> :
+ FLAT_Store_Pseudo<opName, regClass, 1> {
+ let is_flat_global = 1;
+}
+
+class FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> :
+ FLAT_Store_Pseudo<opName, regClass, 1> {
+ let is_flat_scratch = 1;
+}
+
multiclass FLAT_Atomic_Pseudo<
string opName,
RegisterClass vdst_rc,
@@ -306,6 +333,26 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
} // End SubtargetPredicate = isCI
+let SubtargetPredicate = HasFlatGlobalInsts in {
+def GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
+def GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
+def GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
+def GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
+def GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
+def GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
+def GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
+def GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
+
+def GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
+def GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
+def GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
+def GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
+def GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
+def GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
+
+} // End SubtargetPredicate = HasFlatGlobalInsts
+
+
//===----------------------------------------------------------------------===//
// Flat Patterns
//===----------------------------------------------------------------------===//
@@ -557,3 +604,18 @@ defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
+def GLOBAL_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, GLOBAL_LOAD_UBYTE>;
+def GLOBAL_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, GLOBAL_LOAD_SBYTE>;
+def GLOBAL_LOAD_USHORT_vi : FLAT_Real_vi <0x12, GLOBAL_LOAD_USHORT>;
+def GLOBAL_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, GLOBAL_LOAD_SSHORT>;
+def GLOBAL_LOAD_DWORD_vi : FLAT_Real_vi <0x14, GLOBAL_LOAD_DWORD>;
+def GLOBAL_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, GLOBAL_LOAD_DWORDX2>;
+def GLOBAL_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, GLOBAL_LOAD_DWORDX4>;
+def GLOBAL_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, GLOBAL_LOAD_DWORDX3>;
+
+def GLOBAL_STORE_BYTE_vi : FLAT_Real_vi <0x18, GLOBAL_STORE_BYTE>;
+def GLOBAL_STORE_SHORT_vi : FLAT_Real_vi <0x1a, GLOBAL_STORE_SHORT>;
+def GLOBAL_STORE_DWORD_vi : FLAT_Real_vi <0x1c, GLOBAL_STORE_DWORD>;
+def GLOBAL_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, GLOBAL_STORE_DWORDX2>;
+def GLOBAL_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, GLOBAL_STORE_DWORDX4>;
+def GLOBAL_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, GLOBAL_STORE_DWORDX3>;
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index b84640230eeeb..7c31c8e397ba7 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -72,6 +72,11 @@ void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
}
+void AMDGPUInstPrinter::printS16ImmDecOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << formatDec(static_cast<int16_t>(MI->getOperand(OpNo).getImm()));
+}
+
void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -118,6 +123,16 @@ void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
}
}
+void AMDGPUInstPrinter::printOffsetS13(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint16_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm != 0) {
+ O << ((OpNo == 0)? "offset:" : " offset:");
+ printS16ImmDecOperand(MI, OpNo, O);
+ }
+}
+
void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -216,6 +231,24 @@ void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
O << " vm";
}
+void AMDGPUInstPrinter::printDFMT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm()) {
+ O << " dfmt:";
+ printU8ImmDecOperand(MI, OpNo, O);
+ }
+}
+
+void AMDGPUInstPrinter::printNFMT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm()) {
+ O << " nfmt:";
+ printU8ImmDecOperand(MI, OpNo, O);
+ }
+}
+
void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
const MCRegisterInfo &MRI) {
switch (RegNo) {
@@ -379,7 +412,6 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
uint16_t Lo16 = static_cast<uint16_t>(Imm);
- assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
printImmediate16(Lo16, STI, O);
}
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index c8094c4b840a1..7bbf99a85f409 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -42,6 +42,7 @@ private:
void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
@@ -52,6 +53,9 @@ private:
void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printOffsetS13(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+
void printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printOffset1(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -84,6 +88,10 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printExpVM(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printDFMT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNFMT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 0a9c2b94c1eee..2b408ff10caae 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -30,14 +30,9 @@ public:
unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; };
- void processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override {
@@ -102,36 +97,11 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
}
-void AMDGPUAsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) {
- MCValue Res;
-
- // When we have complex expressions like: BB0_1 + (BB0_2 - 4), which are
- // used for long branches, this function will be called with
- // IsResolved = false and Value set to some pre-computed value. In
- // the example above, the value would be:
- // (BB0_1 + (BB0_2 - 4)) - CurrentOffsetFromStartOfFunction.
- // This is not what we want. We just want the expression computation
- // only. The reason the MC layer subtracts the current offset from the
- // expression is because the fixup is of kind FK_PCRel_4.
- // For these scenarios, evaluateAsValue gives us the computation that we
- // want.
- if (!IsResolved && Fixup.getValue()->evaluateAsValue(Res, Layout) &&
- Res.isAbsolute()) {
- Value = Res.getConstant();
- IsResolved = true;
-
- }
- if (IsResolved)
- Value = adjustFixupValue(Fixup, Value, &Asm.getContext());
-}
-
-void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel, MCContext &Ctx) const {
+void AMDGPUAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsPCRel) const {
+ Value = adjustFixupValue(Fixup, Value, &Asm.getContext());
if (!Value)
return; // Doesn't change encoding.
@@ -142,7 +112,7 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
uint32_t Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the bits from
// the fixup value.
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index a856b17a228f0..1b062064ace1c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -52,15 +52,15 @@ public:
return 0;
}
- virtual unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
return 0;
}
- virtual unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ virtual unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
return 0;
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index e02acf516c0db..376c9bfe5ccf2 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -70,13 +70,13 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
- unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
+ unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
- unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
+ unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
};
} // end anonymous namespace
@@ -252,9 +252,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
uint16_t Lo16 = static_cast<uint16_t>(Imm);
- assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
uint32_t Encoding = getLit16Encoding(Lo16, STI);
- assert(Encoding != 255 && "packed constants can only be inline immediates");
return Encoding;
}
default:
@@ -328,11 +326,11 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
}
unsigned
-SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
using namespace AMDGPU::SDWA;
-
+
uint64_t RegEnc = 0;
const MCOperand &MO = MI.getOperand(OpNo);
@@ -347,9 +345,9 @@ SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
}
unsigned
-SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
using namespace AMDGPU::SDWA;
uint64_t RegEnc = 0;
@@ -365,6 +363,25 @@ SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
return RegEnc;
}
+static bool needsPCRel(const MCExpr *Expr) {
+ switch (Expr->getKind()) {
+ case MCExpr::SymbolRef:
+ return true;
+ case MCExpr::Binary: {
+ auto *BE = cast<MCBinaryExpr>(Expr);
+ if (BE->getOpcode() == MCBinaryExpr::Sub)
+ return false;
+ return needsPCRel(BE->getLHS()) || needsPCRel(BE->getRHS());
+ }
+ case MCExpr::Unary:
+ return needsPCRel(cast<MCUnaryExpr>(Expr)->getSubExpr());
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return false;
+ }
+ llvm_unreachable("invalid kind");
+}
+
uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
@@ -373,12 +390,21 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
return MRI.getEncodingValue(MO.getReg());
if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) {
- const auto *Expr = dyn_cast<MCSymbolRefExpr>(MO.getExpr());
+ // FIXME: If this is expression is PCRel or not should not depend on what
+ // the expression looks like. Given that this is just a general expression,
+ // it should probably be FK_Data_4 and whatever is producing
+ //
+ // s_add_u32 s2, s2, (extern_const_addrspace+16
+ //
+ // And expecting a PCRel should instead produce
+ //
+ // .Ltmp1:
+ // s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1
MCFixupKind Kind;
- if (Expr && Expr->getSymbol().isExternal())
- Kind = FK_Data_4;
- else
+ if (needsPCRel(MO.getExpr()))
Kind = FK_PCRel_4;
+ else
+ Kind = FK_Data_4;
Fixups.push_back(MCFixup::create(4, MO.getExpr(), Kind, MI.getLoc()));
}
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index f6f2582aa11b3..d30d1d382588c 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -80,7 +80,7 @@ def : Proc<"cayman", R600_VLIW4_Itin,
// Southern Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"gfx600", SIFullSpeedModel,
+def : ProcessorModel<"gfx600", SIFullSpeedModel,
[FeatureISAVersion6_0_0]>;
def : ProcessorModel<"SI", SIFullSpeedModel,
@@ -95,7 +95,7 @@ def : ProcessorModel<"gfx601", SIQuarterSpeedModel,
[FeatureISAVersion6_0_1]
>;
-def : ProcessorModel<"pitcairn", SIQuarterSpeedModel,
+def : ProcessorModel<"pitcairn", SIQuarterSpeedModel,
[FeatureISAVersion6_0_1]>;
def : ProcessorModel<"verde", SIQuarterSpeedModel,
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index c55878f8bff0f..215791f4f92dd 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -584,23 +584,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return LowerImplicitParameter(DAG, VT, DL, 8);
case Intrinsic::r600_read_tgid_x:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_X, VT);
+ return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_X, VT);
case Intrinsic::r600_read_tgid_y:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_Y, VT);
+ return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_Y, VT);
case Intrinsic::r600_read_tgid_z:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_Z, VT);
+ return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_Z, VT);
case Intrinsic::r600_read_tidig_x:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_X, VT);
+ return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_X, VT);
case Intrinsic::r600_read_tidig_y:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_Y, VT);
+ return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_Y, VT);
case Intrinsic::r600_read_tidig_z:
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_Z, VT);
+ return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_Z, VT);
case Intrinsic::r600_recipsqrt_ieee:
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index 5cd90323ff67b..3915c0e5bdbed 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -118,9 +118,9 @@ namespace AMDGPU {
// Operand for source modifiers for VOP instructions
OPERAND_INPUT_MODS,
- // Operand for GFX9 SDWA instructions
- OPERAND_SDWA9_SRC,
- OPERAND_SDWA9_VOPC_DST,
+ // Operand for SDWA instructions
+ OPERAND_SDWA_SRC,
+ OPERAND_SDWA_VOPC_DST,
/// Operand with 32-bit immediate that uses the constant bus.
OPERAND_KIMM32,
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 5f5f25103c027..0a795c99f94e5 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -174,6 +174,31 @@ static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
}
+static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
+ const SIRegisterInfo *TRI,
+ const SIInstrInfo *TII) {
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ auto &Src = MI.getOperand(1);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = Src.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !TargetRegisterInfo::isVirtualRegister(DstReg))
+ return false;
+
+ for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
+ const auto *UseMI = MO.getParent();
+ if (UseMI == &MI)
+ continue;
+ if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
+ UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END ||
+ !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src))
+ return false;
+ }
+ // Change VGPR to SGPR destination.
+ MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg)));
+ return true;
+}
+
// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
//
// SGPRx = ...
@@ -214,6 +239,9 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
return false;
+ if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII))
+ return true;
+
// TODO: Could have multiple extracts?
unsigned SubReg = CopyUse.getOperand(1).getSubReg();
if (SubReg != AMDGPU::NoSubRegister)
@@ -563,6 +591,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
break;
}
TII->moveToVALU(MI);
+ } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
+ tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
}
break;
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index e10f1ed3762e8..f391f67a241f1 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -13,6 +13,7 @@
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -166,6 +167,8 @@ static bool updateOperand(FoldCandidate &Fold,
if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
TargetRegisterInfo::isVirtualRegister(New->getReg())) {
Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
+
+ Old.setIsUndef(New->isUndef());
return true;
}
@@ -470,7 +473,7 @@ static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI,
return &Op;
MachineInstr *Def = MRI.getVRegDef(Op.getReg());
- if (Def->isMoveImmediate()) {
+ if (Def && Def->isMoveImmediate()) {
MachineOperand &ImmSrc = Def->getOperand(1);
if (ImmSrc.isImm())
return &ImmSrc;
@@ -921,12 +924,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// level.
bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock *MBB : depth_first(&MF)) {
MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ for (I = MBB->begin(); I != MBB->end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index b1bd14e421f02..08a64de385018 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -284,7 +284,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
- if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
+ if (ST.isAmdCodeObjectV2(MF)) {
PreloadedPrivateBufferReg = TRI->getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
}
@@ -363,14 +363,14 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
// Use relocations to get the pointer, and setup the other bits manually.
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
- if (MFI->hasPrivateMemoryInputPtr()) {
+ if (MFI->hasImplicitBufferPtr()) {
unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
BuildMI(MBB, I, DL, Mov64, Rsrc01)
- .addReg(PreloadedPrivateBufferReg)
+ .addReg(MFI->getImplicitBufferPtrUserSGPR())
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
} else {
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
@@ -385,7 +385,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
MachineMemOperand::MODereferenceable,
0, 0);
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
- .addReg(PreloadedPrivateBufferReg)
+ .addReg(MFI->getImplicitBufferPtrUserSGPR())
.addImm(0) // offset
.addImm(0) // glc
.addMemOperand(MMO)
@@ -417,14 +417,69 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
void SIFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- if (MFI->isEntryFunction())
+ const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ if (FuncInfo->isEntryFunction()) {
emitEntryFunctionPrologue(MF, MBB);
+ return;
+ }
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
+ unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
+
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL;
+
+ bool NeedFP = hasFP(MF);
+ if (NeedFP) {
+ // If we need a base pointer, set it up here. It's whatever the value of
+ // the stack pointer is at this point. Any variable size objects will be
+ // allocated after this, so we can still use the base pointer to reference
+ // locals.
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
+ .addReg(StackPtrReg)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ uint32_t NumBytes = MFI.getStackSize();
+ if (NumBytes != 0 && hasSP(MF)) {
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
+ .addReg(StackPtrReg)
+ .addImm(NumBytes * ST.getWavefrontSize())
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ if (FuncInfo->isEntryFunction())
+ return;
+ unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
+ if (StackPtrReg == AMDGPU::NoRegister)
+ return;
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ uint32_t NumBytes = MFI.getStackSize();
+
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc DL;
+
+ // FIXME: Clarify distinction between no set SP and SP. For callee functions,
+ // it's really whether we need SP to be accurate or not.
+
+ if (NumBytes != 0 && hasSP(MF)) {
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
+ .addReg(StackPtrReg)
+ .addImm(NumBytes * ST.getWavefrontSize())
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
}
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
@@ -557,3 +612,19 @@ void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
}
}
+
+bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
+ // All stack operations are relative to the frame offset SGPR.
+ // TODO: Still want to eliminate sometimes.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // XXX - Is this only called after frame is finalized? Should be able to check
+ // frame size.
+ return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI);
+}
+
+bool SIFrameLowering::hasSP(const MachineFunction &MF) const {
+ // All stack operations are relative to the frame offset SGPR.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ return MFI.hasCalls() || MFI.hasVarSizedObjects();
+}
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h
index e17adbe273614..d4dfa1c7eaa86 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -60,6 +60,10 @@ private:
/// \brief Emits debugger prologue.
void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+public:
+ bool hasFP(const MachineFunction &MF) const override;
+ bool hasSP(const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 441f1ef4bd04c..d0f4e00994de1 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -211,6 +211,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UADDO, MVT::i32, Legal);
setOperationAction(ISD::USUBO, MVT::i32, Legal);
+ setOperationAction(ISD::ADDCARRY, MVT::i32, Legal);
+ setOperationAction(ISD::SUBCARRY, MVT::i32, Legal);
+
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
@@ -471,6 +474,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
}
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::ADDCARRY);
+ setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::SUBCARRY);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);
@@ -1061,10 +1068,10 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) {
- if (Info.hasPrivateMemoryInputPtr()) {
- unsigned PrivateMemoryPtrReg = Info.addPrivateMemoryPtr(TRI);
- MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SGPR_64RegClass);
- CCInfo.AllocateReg(PrivateMemoryPtrReg);
+ if (Info.hasImplicitBufferPtr()) {
+ unsigned ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI);
+ MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
+ CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
@@ -1227,7 +1234,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
}
}
- if (NeedSP){
+ if (NeedSP) {
unsigned ReservedStackPtrOffsetReg = TRI.reservedStackPtrOffsetReg(MF);
Info.setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
@@ -2998,7 +3005,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
switch (IntrinsicID) {
case Intrinsic::amdgcn_implicit_buffer_ptr: {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ if (getSubtarget()->isAmdCodeObjectV2(MF))
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
+
+ unsigned Reg = TRI->getPreloadedValue(MF,
+ SIRegisterInfo::IMPLICIT_BUFFER_PTR);
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
}
case Intrinsic::amdgcn_dispatch_ptr:
@@ -3288,6 +3299,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
SDLoc DL(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec: {
@@ -3313,7 +3326,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // glc
Op.getOperand(6) // slc
};
- MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
@@ -3328,6 +3340,29 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
}
+ case Intrinsic::amdgcn_tbuffer_load: {
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Op.getOperand(4), // voffset
+ Op.getOperand(5), // soffset
+ Op.getOperand(6), // offset
+ Op.getOperand(7), // dfmt
+ Op.getOperand(8), // nfmt
+ Op.getOperand(9), // glc
+ Op.getOperand(10) // slc
+ };
+
+ EVT VT = Op.getOperand(2).getValueType();
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad,
+ VT.getStoreSize(), VT.getStoreSize());
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, VT, MMO);
+ }
// Basic sample.
case Intrinsic::amdgcn_image_sample:
case Intrinsic::amdgcn_image_sample_cl:
@@ -3393,10 +3428,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ MachineFunction &MF = DAG.getMachineFunction();
switch (IntrinsicID) {
case Intrinsic::amdgcn_exp: {
@@ -3463,33 +3498,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain,
Op.getOperand(2), Op.getOperand(3));
}
- case AMDGPUIntrinsic::SI_tbuffer_store: {
- SDValue Ops[] = {
- Chain,
- Op.getOperand(2),
- Op.getOperand(3),
- Op.getOperand(4),
- Op.getOperand(5),
- Op.getOperand(6),
- Op.getOperand(7),
- Op.getOperand(8),
- Op.getOperand(9),
- Op.getOperand(10),
- Op.getOperand(11),
- Op.getOperand(12),
- Op.getOperand(13),
- Op.getOperand(14)
- };
-
- EVT VT = Op.getOperand(3).getValueType();
-
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore,
- VT.getStoreSize(), 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
- Op->getVTList(), Ops, VT, MMO);
- }
case AMDGPUIntrinsic::AMDGPU_kill: {
SDValue Src = Op.getOperand(2);
if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Src)) {
@@ -3505,7 +3513,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
case Intrinsic::amdgcn_s_barrier: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
- const MachineFunction &MF = DAG.getMachineFunction();
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
if (WGSize <= ST.getWavefrontSize())
@@ -3514,6 +3521,75 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
return SDValue();
};
+ case AMDGPUIntrinsic::SI_tbuffer_store: {
+
+ // Extract vindex and voffset from vaddr as appropriate
+ const ConstantSDNode *OffEn = cast<ConstantSDNode>(Op.getOperand(10));
+ const ConstantSDNode *IdxEn = cast<ConstantSDNode>(Op.getOperand(11));
+ SDValue VAddr = Op.getOperand(5);
+
+ SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
+
+ assert(!(OffEn->isOne() && IdxEn->isOne()) &&
+ "Legacy intrinsic doesn't support both offset and index - use new version");
+
+ SDValue VIndex = IdxEn->isOne() ? VAddr : Zero;
+ SDValue VOffset = OffEn->isOne() ? VAddr : Zero;
+
+ // Deal with the vec-3 case
+ const ConstantSDNode *NumChannels = cast<ConstantSDNode>(Op.getOperand(4));
+ auto Opcode = NumChannels->getZExtValue() == 3 ?
+ AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT;
+
+ SDValue Ops[] = {
+ Chain,
+ Op.getOperand(3), // vdata
+ Op.getOperand(2), // rsrc
+ VIndex,
+ VOffset,
+ Op.getOperand(6), // soffset
+ Op.getOperand(7), // inst_offset
+ Op.getOperand(8), // dfmt
+ Op.getOperand(9), // nfmt
+ Op.getOperand(12), // glc
+ Op.getOperand(13), // slc
+ };
+
+ assert((cast<ConstantSDNode>(Op.getOperand(14)))->getZExtValue() == 0 &&
+ "Value of tfe other than zero is unsupported");
+
+ EVT VT = Op.getOperand(3).getValueType();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ VT.getStoreSize(), 4);
+ return DAG.getMemIntrinsicNode(Opcode, DL,
+ Op->getVTList(), Ops, VT, MMO);
+ }
+
+ case Intrinsic::amdgcn_tbuffer_store: {
+ SDValue Ops[] = {
+ Chain,
+ Op.getOperand(2), // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Op.getOperand(5), // voffset
+ Op.getOperand(6), // soffset
+ Op.getOperand(7), // offset
+ Op.getOperand(8), // dfmt
+ Op.getOperand(9), // nfmt
+ Op.getOperand(10), // glc
+ Op.getOperand(11) // slc
+ };
+ EVT VT = Op.getOperand(3).getValueType();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ VT.getStoreSize(), 4);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
+ Op->getVTList(), Ops, VT, MMO);
+ }
+
default:
return Op;
}
@@ -4839,6 +4915,103 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
return 0;
}
+SDValue SITargetLowering::performAddCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ if (VT != MVT::i32)
+ return SDValue();
+
+ SDLoc SL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // add x, zext (setcc) => addcarry x, 0, setcc
+ // add x, sext (setcc) => subcarry x, 0, setcc
+ unsigned Opc = LHS.getOpcode();
+ if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND ||
+ Opc == ISD::ANY_EXTEND || Opc == ISD::ADDCARRY)
+ std::swap(RHS, LHS);
+
+ Opc = RHS.getOpcode();
+ switch (Opc) {
+ default: break;
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND: {
+ auto Cond = RHS.getOperand(0);
+ if (Cond.getOpcode() != ISD::SETCC &&
+ Cond.getOpcode() != AMDGPUISD::FP_CLASS)
+ break;
+ SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
+ SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
+ Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY;
+ return DAG.getNode(Opc, SL, VTList, Args);
+ }
+ case ISD::ADDCARRY: {
+ // add x, (addcarry y, 0, cc) => addcarry x, y, cc
+ auto C = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
+ if (!C || C->getZExtValue() != 0) break;
+ SDValue Args[] = { LHS, RHS.getOperand(0), RHS.getOperand(2) };
+ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), RHS->getVTList(), Args);
+ }
+ }
+ return SDValue();
+}
+
+SDValue SITargetLowering::performSubCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ if (VT != MVT::i32)
+ return SDValue();
+
+ SDLoc SL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ unsigned Opc = LHS.getOpcode();
+ if (Opc != ISD::SUBCARRY)
+ std::swap(RHS, LHS);
+
+ if (LHS.getOpcode() == ISD::SUBCARRY) {
+ // sub (subcarry x, 0, cc), y => subcarry x, y, cc
+ auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+ if (!C || C->getZExtValue() != 0)
+ return SDValue();
+ SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
+ return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args);
+ }
+ return SDValue();
+}
+
+SDValue SITargetLowering::performAddCarrySubCarryCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+
+ if (N->getValueType(0) != MVT::i32)
+ return SDValue();
+
+ auto C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C || C->getZExtValue() != 0)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue LHS = N->getOperand(0);
+
+ // addcarry (add x, y), 0, cc => addcarry x, y, cc
+ // subcarry (sub x, y), 0, cc => subcarry x, y, cc
+ unsigned LHSOpc = LHS.getOpcode();
+ unsigned Opc = N->getOpcode();
+ if ((LHSOpc == ISD::ADD && Opc == ISD::ADDCARRY) ||
+ (LHSOpc == ISD::SUB && Opc == ISD::SUBCARRY)) {
+ SDValue Args[] = { LHS.getOperand(0), LHS.getOperand(1), N->getOperand(2) };
+ return DAG.getNode(Opc, SDLoc(N), N->getVTList(), Args);
+ }
+ return SDValue();
+}
+
SDValue SITargetLowering::performFAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
@@ -5009,6 +5182,13 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default:
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+ case ISD::ADD:
+ return performAddCombine(N, DCI);
+ case ISD::SUB:
+ return performSubCombine(N, DCI);
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY:
+ return performAddCarrySubCarryCombine(N, DCI);
case ISD::FADD:
return performFAddCombine(N, DCI);
case ISD::FSUB:
@@ -5425,15 +5605,6 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, const SDLoc &DL,
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
}
-SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
- const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const {
- SDValue VReg = AMDGPUTargetLowering::CreateLiveInRegister(DAG, RC, Reg, VT);
-
- return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
- cast<RegisterSDNode>(VReg)->getReg(), VT);
-}
-
//===----------------------------------------------------------------------===//
// SI Inline Assembly Support
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index 8e2ec40b224cd..24f88e632d38e 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -108,6 +108,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
unsigned getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0, const SDNode *N1) const;
+ SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -216,8 +219,6 @@ public:
void AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const override;
- SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const override;
SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL,
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1097814e99ce2..c9b48fea7225e 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2108,7 +2108,9 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
uint8_t OperandType) const {
- if (!MO.isImm() || OperandType < MCOI::OPERAND_FIRST_TARGET)
+ if (!MO.isImm() ||
+ OperandType < AMDGPU::OPERAND_SRC_FIRST ||
+ OperandType > AMDGPU::OPERAND_SRC_LAST)
return false;
// MachineOperand provides no way to tell the true operand size, since it only
@@ -2433,8 +2435,73 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ // Verify SDWA
+ if (isSDWA(MI)) {
+
+ if (!ST.hasSDWA()) {
+ ErrInfo = "SDWA is not supported on this target";
+ return false;
+ }
+
+ int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
+ if ( DstIdx == -1)
+ DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst);
+
+ const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
+
+ for (int OpIdx: OpIndicies) {
+ if (OpIdx == -1)
+ continue;
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+
+ if (!ST.hasSDWAScalar()) {
+ // Only VGPRS on VI
+ if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
+ ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
+ return false;
+ }
+ } else {
+ // No immediates on GFX9
+ if (!MO.isReg()) {
+ ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
+ return false;
+ }
+ }
+ }
+
+ if (!ST.hasSDWAOmod()) {
+ // No omod allowed on VI
+ const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (OMod != nullptr &&
+ (!OMod->isImm() || OMod->getImm() != 0)) {
+ ErrInfo = "OMod not allowed in SDWA instructions on VI";
+ return false;
+ }
+ }
+
+ uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
+ if (isVOPC(BasicOpcode)) {
+ if (!ST.hasSDWASdst() && DstIdx != -1) {
+ // Only vcc allowed as dst on VI for VOPC
+ const MachineOperand &Dst = MI.getOperand(DstIdx);
+ if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
+ ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
+ return false;
+ }
+ } else if (!ST.hasSDWAClampVOPC()) {
+ // No clamp allowed on GFX9 for VOPC
+ const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
+ if (Clamp != nullptr &&
+ (!Clamp->isImm() || Clamp->getImm() != 0)) {
+ ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
+ return false;
+ }
+ }
+ }
+ }
+
// Verify VOP*
- if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI)) {
+ if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) {
// Only look at the true operands. Only a real operand can use the constant
// bus, and we don't want to check pseudo-operands like the source modifier
// flags.
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index f6e5e8883f63c..74b48c7618087 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -814,6 +814,9 @@ namespace AMDGPU {
int getSDWAOp(uint16_t Opcode);
LLVM_READONLY
+ int getBasicFromSDWAOp(uint16_t Opcode);
+
+ LLVM_READONLY
int getCommuteRev(uint16_t Opcode);
LLVM_READONLY
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 470a47b024433..3b4a8b5d1e817 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -20,6 +20,8 @@ def SIEncodingFamily {
int NONE = -1;
int SI = 0;
int VI = 1;
+ int SDWA = 2;
+ int SDWA9 = 3;
}
//===----------------------------------------------------------------------===//
@@ -39,25 +41,41 @@ def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
-def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
- SDTypeProfile<0, 13,
- [SDTCisVT<0, v4i32>, // rsrc(SGPR)
- SDTCisVT<1, iAny>, // vdata(VGPR)
- SDTCisVT<2, i32>, // num_channels(imm)
- SDTCisVT<3, i32>, // vaddr(VGPR)
+def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT",
+ SDTypeProfile<1, 9,
+ [ // vdata
+ SDTCisVT<1, v4i32>, // rsrc
+ SDTCisVT<2, i32>, // vindex(VGPR)
+ SDTCisVT<3, i32>, // voffset(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
- SDTCisVT<5, i32>, // inst_offset(imm)
+ SDTCisVT<5, i32>, // offset(imm)
SDTCisVT<6, i32>, // dfmt(imm)
SDTCisVT<7, i32>, // nfmt(imm)
- SDTCisVT<8, i32>, // offen(imm)
- SDTCisVT<9, i32>, // idxen(imm)
- SDTCisVT<10, i32>, // glc(imm)
- SDTCisVT<11, i32>, // slc(imm)
- SDTCisVT<12, i32> // tfe(imm)
+ SDTCisVT<8, i32>, // glc(imm)
+ SDTCisVT<9, i32> // slc(imm)
]>,
- [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
+ [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
>;
+def SDTtbuffer_store : SDTypeProfile<0, 10,
+ [ // vdata
+ SDTCisVT<1, v4i32>, // rsrc
+ SDTCisVT<2, i32>, // vindex(VGPR)
+ SDTCisVT<3, i32>, // voffset(VGPR)
+ SDTCisVT<4, i32>, // soffset(SGPR)
+ SDTCisVT<5, i32>, // offset(imm)
+ SDTCisVT<6, i32>, // dfmt(imm)
+ SDTCisVT<7, i32>, // nfmt(imm)
+ SDTCisVT<8, i32>, // glc(imm)
+ SDTCisVT<9, i32> // slc(imm)
+ ]>;
+
+def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+def SItbuffer_store_x3 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_X3",
+ SDTtbuffer_store,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+
def SDTBufferLoad : SDTypeProfile<1, 5,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
@@ -452,25 +470,25 @@ def ExpSrc3 : RegisterOperand<VGPR_32> {
let ParserMatchClass = VReg32OrOffClass;
}
-class SDWA9Src : RegisterOperand<VS_32> {
+class SDWASrc : RegisterOperand<VS_32> {
let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_SDWA9_SRC";
- let EncoderMethod = "getSDWA9SrcEncoding";
+ let OperandType = "OPERAND_SDWA_SRC";
+ let EncoderMethod = "getSDWASrcEncoding";
}
-def SDWA9Src32 : SDWA9Src {
- let DecoderMethod = "decodeSDWA9Src32";
+def SDWASrc32 : SDWASrc {
+ let DecoderMethod = "decodeSDWASrc32";
}
-def SDWA9Src16 : SDWA9Src {
- let DecoderMethod = "decodeSDWA9Src16";
+def SDWASrc16 : SDWASrc {
+ let DecoderMethod = "decodeSDWASrc16";
}
-def SDWA9VopcDst : VOPDstOperand<SReg_64> {
+def SDWAVopcDst : VOPDstOperand<SReg_64> {
let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_SDWA9_VOPC_DST";
- let EncoderMethod = "getSDWA9VopcDstEncoding";
- let DecoderMethod = "decodeSDWA9VopcDst";
+ let OperandType = "OPERAND_SDWA_VOPC_DST";
+ let EncoderMethod = "getSDWAVopcDstEncoding";
+ let DecoderMethod = "decodeSDWAVopcDst";
}
class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
@@ -525,7 +543,7 @@ def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>;
-def offset_s13 : NamedOperandS13<"Offset", NamedMatchClass<"OffsetS13">>;
+def offset_s13 : NamedOperandS13<"OffsetS13", NamedMatchClass<"OffsetS13">>;
def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
@@ -545,6 +563,9 @@ def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
+def DFMT : NamedOperandU8<"DFMT", NamedMatchClass<"DFMT">>;
+def NFMT : NamedOperandU8<"NFMT", NamedMatchClass<"NFMT">>;
+
def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
@@ -634,13 +655,13 @@ class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass>
def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
-def FPRegInputModsMatchClass : AsmOperandClass {
- let Name = "RegWithFPInputMods";
+def FPRegSDWAInputModsMatchClass : AsmOperandClass {
+ let Name = "SDWARegWithFPInputMods";
let ParserMethod = "parseRegWithFPInputMods";
- let PredicateMethod = "isRegKind";
+ let PredicateMethod = "isSDWARegKind";
}
-def FPRegInputMods : InputMods <FPRegInputModsMatchClass> {
+def FPRegSDWAInputMods : InputMods <FPRegSDWAInputModsMatchClass> {
let PrintMethod = "printOperandAndFPInputMods";
}
@@ -655,13 +676,13 @@ def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
}
-def IntRegInputModsMatchClass : AsmOperandClass {
- let Name = "RegWithIntInputMods";
+def IntRegSDWAInputModsMatchClass : AsmOperandClass {
+ let Name = "SDWARegWithIntInputMods";
let ParserMethod = "parseRegWithIntInputMods";
- let PredicateMethod = "isRegKind";
+ let PredicateMethod = "isSDWARegKind";
}
-def IntRegInputMods : InputMods <IntRegInputModsMatchClass> {
+def IntRegSDWAInputMods : InputMods <IntRegSDWAInputModsMatchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
@@ -851,10 +872,10 @@ class getVALUDstForVT<ValueType VT> {
}
// Returns the register class to use for the destination of VOP[12C]
-// instructions with GFX9 SDWA extension
-class getSDWA9DstForVT<ValueType VT> {
+// instructions with SDWA extension
+class getSDWADstForVT<ValueType VT> {
RegisterOperand ret = !if(!eq(VT.Size, 1),
- SDWA9VopcDst, // VOPC
+ SDWAVopcDst, // VOPC
VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
}
@@ -898,8 +919,8 @@ class getVregSrcForVT<ValueType VT> {
!if(!eq(VT.Size, 64), VReg_64, VGPR_32));
}
-class getSDWA9SrcForVT <ValueType VT> {
- RegisterOperand ret = !if(!eq(VT.Size, 16), SDWA9Src16, SDWA9Src32);
+class getSDWASrcForVT <ValueType VT> {
+ RegisterOperand ret = !if(!eq(VT.Size, 16), SDWASrc16, SDWASrc32);
}
// Returns the register class to use for sources of VOP3 instructions for the
@@ -995,7 +1016,7 @@ class getSrcMod <ValueType VT> {
);
}
-// Return type of input modifiers operand specified input operand for SDWA/DPP
+// Return type of input modifiers operand specified input operand for DPP
class getSrcModExt <ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
@@ -1004,13 +1025,13 @@ class getSrcModExt <ValueType VT> {
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}
-// Return type of input modifiers operand specified input operand for SDWA 9
-class getSrcModSDWA9 <ValueType VT> {
+// Return type of input modifiers operand specified input operand for SDWA
+class getSrcModSDWA <ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0)));
- Operand ret = !if(isFP, FPRegInputMods, IntRegInputMods);
+ Operand ret = !if(isFP, FPRegSDWAInputMods, IntRegSDWAInputMods);
}
// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
@@ -1141,36 +1162,12 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
/* endif */)));
}
-class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
- bit HasFloatModifiers, Operand Src0Mod, Operand Src1Mod,
- ValueType DstVT> {
- dag ret = !if(!eq(NumSrcArgs, 0),
- // VOP1 without input operands (V_NOP)
- (ins),
- !if(!eq(NumSrcArgs, 1),
- // VOP1_SDWA
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel),
- !if(!eq(NumSrcArgs, 2),
- !if(!eq(DstVT.Size, 1),
- // VOPC_SDWA with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
- // VOP2_SDWA with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel, src1_sel:$src1_sel)),
- (ins)/* endif */)));
-}
-// Ins for GFX9 SDWA
-class getInsSDWA9 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
- bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
- ValueType DstVT> {
+// Ins for SDWA
+class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
+ bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
+ ValueType DstVT> {
dag ret = !if(!eq(NumSrcArgs, 0),
// VOP1 without input operands (V_NOP)
@@ -1178,31 +1175,31 @@ class getInsSDWA9 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArg
!if(!eq(NumSrcArgs, 1),
// VOP1
!if(!eq(HasSDWAOMod, 0),
- // VOP1_SDWA9 without omod
+ // VOP1_SDWA without omod
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
clampmod:$clamp,
dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel),
- // VOP1_SDWA9 with omod
+ // VOP1_SDWA with omod
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
clampmod:$clamp, omod:$omod,
dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel)),
!if(!eq(NumSrcArgs, 2),
!if(!eq(DstVT.Size, 1),
- // VOPC_SDWA9
+ // VOPC_SDWA
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
- src0_sel:$src0_sel, src1_sel:$src1_sel),
- // VOP2_SDWA9
+ clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
+ // VOP2_SDWA
!if(!eq(HasSDWAOMod, 0),
- // VOP2_SDWA9 without omod
+ // VOP2_SDWA without omod
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp,
dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel),
- // VOP1_SDWA9 with omod
+ // VOP2_SDWA with omod
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp, omod:$omod,
@@ -1220,12 +1217,12 @@ class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> {
(outs)); // V_NOP
}
-// Outs for GFX9 SDWA
-class getOutsSDWA9 <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA9> {
+// Outs for SDWA
+class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
dag ret = !if(HasDst,
!if(!eq(DstVT.Size, 1),
- (outs DstRCSDWA9:$sdst),
- (outs DstRCSDWA9:$vdst)),
+ (outs DstRCSDWA:$sdst),
+ (outs DstRCSDWA:$vdst)),
(outs)); // V_NOP
}
@@ -1387,8 +1384,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field ValueType Src2VT = ArgVT[3];
field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
- field RegisterOperand DstRCSDWA = getVALUDstForVT<DstVT>.ret;
- field RegisterOperand DstRCSDWA9 = getSDWA9DstForVT<DstVT>.ret;
+ field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
@@ -1396,19 +1392,15 @@ class VOPProfile <list<ValueType> _ArgVT> {
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
- field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret;
- field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret;
- field RegisterOperand Src0SDWA9 = getSDWA9SrcForVT<Src0VT>.ret;
- field RegisterOperand Src1SDWA9 = getSDWA9SrcForVT<Src0VT>.ret;
+ field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
+ field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
field Operand Src0Mod = getSrcMod<Src0VT>.ret;
field Operand Src1Mod = getSrcMod<Src1VT>.ret;
field Operand Src2Mod = getSrcMod<Src2VT>.ret;
field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
- field Operand Src0ModSDWA = getSrcModExt<Src0VT>.ret;
- field Operand Src1ModSDWA = getSrcModExt<Src1VT>.ret;
- field Operand Src0ModSDWA9 = getSrcModSDWA9<Src0VT>.ret;
- field Operand Src1ModSDWA9 = getSrcModSDWA9<Src1VT>.ret;
+ field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
+ field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
@@ -1457,8 +1449,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Outs32 = Outs;
field dag Outs64 = Outs;
field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
- field dag OutsSDWA = getOutsExt<HasDst, DstVT, DstRCSDWA>.ret;
- field dag OutsSDWA9 = getOutsSDWA9<HasDst, DstVT, DstRCSDWA9>.ret;
+ field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
@@ -1471,11 +1462,9 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
- HasModifiers, Src0ModSDWA, Src1ModSDWA,
+ HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
- field dag InsSDWA9 = getInsSDWA9<Src0SDWA9, Src1SDWA9, NumSrcArgs,
- HasSDWAOMod, Src0ModSDWA9, Src1ModSDWA9,
- DstVT>.ret;
+
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
@@ -1628,13 +1617,13 @@ def getSDWAOp : InstrMapping {
let ValueCols = [["SDWA"]];
}
-// Maps ordinary instructions to their SDWA GFX9 counterparts
-def getSDWA9Op : InstrMapping {
+// Maps SDWA instructions to their ordinary counterparts
+def getBasicFromSDWAOp : InstrMapping {
let FilterClass = "VOP";
let RowFields = ["OpName"];
let ColFields = ["AsmVariantName"];
- let KeyCol = ["Default"];
- let ValueCols = [["SDWA9"]];
+ let KeyCol = ["SDWA"];
+ let ValueCols = [["Default"]];
}
def getMaskedMIMGOp : InstrMapping {
@@ -1669,7 +1658,9 @@ def getMCOpcodeGen : InstrMapping {
let ColFields = ["Subtarget"];
let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
- [!cast<string>(SIEncodingFamily.VI)]];
+ [!cast<string>(SIEncodingFamily.VI)],
+ [!cast<string>(SIEncodingFamily.SDWA)],
+ [!cast<string>(SIEncodingFamily.SDWA9)]];
}
// Get equivalent SOPK instruction.
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 18b197ddb7ae7..3203c38dae344 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -74,7 +74,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDX(false),
WorkItemIDY(false),
WorkItemIDZ(false),
- PrivateMemoryInputPtr(false) {
+ ImplicitBufferPtr(false) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
@@ -86,6 +86,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
ScratchWaveOffsetReg = AMDGPU::SGPR4;
FrameOffsetReg = AMDGPU::SGPR5;
+ StackPtrOffsetReg = AMDGPU::SGPR32;
return;
}
@@ -150,7 +151,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
DispatchID = true;
} else if (ST.isMesaGfxShader(MF)) {
if (HasStackObjects || MaySpill)
- PrivateMemoryInputPtr = true;
+ ImplicitBufferPtr = true;
}
// We don't need to worry about accessing spills with flat instructions.
@@ -203,11 +204,11 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
return FlatScratchInitUserSGPR;
}
-unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
- PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
+unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
+ ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
NumUserSGPRs += 2;
- return PrivateMemoryPtrUserSGPR;
+ return ImplicitBufferPtrUserSGPR;
}
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 9fdb8caac6f21..05aa249584bf1 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -97,7 +97,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
unsigned StackPtrOffsetReg;
// Input registers for non-HSA ABI
- unsigned PrivateMemoryPtrUserSGPR;
+ unsigned ImplicitBufferPtrUserSGPR;
// Input registers setup for the HSA ABI.
// User SGPRs in allocation order.
@@ -179,7 +179,7 @@ private:
// Private memory buffer
// Compute directly in sgpr[0:1]
// Other shaders indirect 64-bits at sgpr[0:1]
- bool PrivateMemoryInputPtr : 1;
+ bool ImplicitBufferPtr : 1;
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
@@ -236,7 +236,7 @@ public:
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
unsigned addDispatchID(const SIRegisterInfo &TRI);
unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
- unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
+ unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
// Add system SGPRs.
unsigned addWorkGroupIDX() {
@@ -341,8 +341,8 @@ public:
return WorkItemIDZ;
}
- bool hasPrivateMemoryInputPtr() const {
- return PrivateMemoryInputPtr;
+ bool hasImplicitBufferPtr() const {
+ return ImplicitBufferPtr;
}
unsigned getNumUserSGPRs() const {
@@ -396,8 +396,8 @@ public:
return QueuePtrUserSGPR;
}
- unsigned getPrivateMemoryPtrUserSGPR() const {
- return PrivateMemoryPtrUserSGPR;
+ unsigned getImplicitBufferPtrUserSGPR() const {
+ return ImplicitBufferPtrUserSGPR;
}
bool hasSpilledSGPRs() const {
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index f4ddf1891683b..4ac23ef03cb32 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -67,9 +67,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void matchSDWAOperands(MachineFunction &MF);
- bool isConvertibleToSDWA(const MachineInstr &MI) const;
+ bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const;
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
- void legalizeScalarOperands(MachineInstr &MI) const;
+ void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const;
StringRef getPassName() const override { return "SI Peephole SDWA"; }
@@ -224,7 +224,7 @@ static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
static bool isSubregOf(const MachineOperand &SubReg,
const MachineOperand &SuperReg,
const TargetRegisterInfo *TRI) {
-
+
if (!SuperReg.isReg() || !SubReg.isReg())
return false;
@@ -557,7 +557,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
-
+
if (TRI->isPhysicalRegister(Src0->getReg()) ||
TRI->isPhysicalRegister(Dst->getReg()))
break;
@@ -590,7 +590,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
break;
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
-
+
if (TRI->isPhysicalRegister(Src1->getReg()) ||
TRI->isPhysicalRegister(Dst->getReg()))
break;
@@ -607,16 +607,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
}
}
-bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
+bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
+ const SISubtarget &ST) const {
// Check if this instruction has opcode that supports SDWA
- unsigned Opc = MI.getOpcode();
- if (AMDGPU::getSDWAOp(Opc) != -1)
- return true;
- int Opc32 = AMDGPU::getVOPe32(Opc);
- if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1)
- return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
- !TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
- return false;
+ int Opc = MI.getOpcode();
+ if (AMDGPU::getSDWAOp(Opc) == -1)
+ Opc = AMDGPU::getVOPe32(Opc);
+
+ if (Opc == -1 || AMDGPU::getSDWAOp(Opc) == -1)
+ return false;
+
+ if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ return false;
+
+ if (TII->isVOPC(Opc)) {
+ if (!ST.hasSDWASdst()) {
+ const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ if (SDst && SDst->getReg() != AMDGPU::VCC)
+ return false;
+ }
+
+ if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+ return false;
+
+ } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
+ return false;
+ }
+
+ if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 ||
+ Opc == AMDGPU::V_MAC_F32_e32))
+ return false;
+
+ return true;
}
bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
@@ -641,6 +663,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
if (Dst) {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
SDWAInst.add(*Dst);
+ } else {
+ Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ assert(Dst &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
+ SDWAInst.add(*Dst);
}
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -677,9 +704,23 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAInst.add(*Src2);
}
- // Initialize clamp.
+ // Copy clamp if present, initialize otherwise
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
- SDWAInst.addImm(0);
+ MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
+ if (Clamp) {
+ SDWAInst.add(*Clamp);
+ } else {
+ SDWAInst.addImm(0);
+ }
+
+ // Copy omod if present, initialize otherwise if needed
+ MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (OMod) {
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1);
+ SDWAInst.add(*OMod);
+ } else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
+ SDWAInst.addImm(0);
+ }
// Initialize dst_sel and dst_unused if present
if (Dst) {
@@ -733,16 +774,25 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
}
// If an instruction was converted to SDWA it should not have immediates or SGPR
-// operands. Copy its scalar operands into VGPRs.
-void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const {
+// operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs.
+void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const {
const MCInstrDesc &Desc = TII->get(MI.getOpcode());
- for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
- MachineOperand &Op = MI.getOperand(I);
+ unsigned ConstantBusCount = 0;
+ for (MachineOperand &Op: MI.explicit_uses()) {
if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
continue;
+
+ unsigned I = MI.getOperandNo(&Op);
if (Desc.OpInfo[I].RegClass == -1 ||
!TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
continue;
+
+ if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
+ TRI->isSGPRReg(*MRI, Op.getReg())) {
+ ++ConstantBusCount;
+ continue;
+ }
+
unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
@@ -758,22 +808,20 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const {
bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- if (!ST.hasSDWA() ||
- !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9
+ if (!ST.hasSDWA())
return false;
- }
MRI = &MF.getRegInfo();
TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo();
-
+
// Find all SDWA operands in MF.
matchSDWAOperands(MF);
for (const auto &OperandPair : SDWAOperands) {
const auto &Operand = OperandPair.second;
MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
- if (PotentialMI && isConvertibleToSDWA(*PotentialMI)) {
+ if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) {
PotentialMatches[PotentialMI].push_back(Operand.get());
}
}
@@ -788,7 +836,7 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
bool Ret = !ConvertedInstructions.empty();
while (!ConvertedInstructions.empty())
- legalizeScalarOperands(*ConvertedInstructions.pop_back_val());
+ legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
return Ret;
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b611f28fcabdf..ef6ad4ad0c8f3 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1044,18 +1044,29 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned CarryOut
= MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
unsigned ScaledReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- // XXX - Should this use a vector shift?
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
- .addReg(DiffReg, RegState::Kill)
- .addImm(Log2_32(ST.getWavefrontSize()));
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
+ .addImm(Log2_32(ST.getWavefrontSize()))
+ .addReg(DiffReg, RegState::Kill);
// TODO: Fold if use instruction is another add of a constant.
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
- .addReg(CarryOut, RegState::Define | RegState::Dead)
- .addImm(Offset)
- .addReg(ScaledReg, RegState::Kill);
+ if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
+ .addReg(CarryOut, RegState::Define | RegState::Dead)
+ .addImm(Offset)
+ .addReg(ScaledReg, RegState::Kill);
+ } else {
+ unsigned ConstOffsetReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
+ .addImm(Offset);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
+ .addReg(CarryOut, RegState::Define | RegState::Dead)
+ .addReg(ConstOffsetReg, RegState::Kill)
+ .addReg(ScaledReg, RegState::Kill);
+ }
MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC);
}
@@ -1341,12 +1352,11 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
- if (ST.isAmdCodeObjectV2(MF)) {
- assert(MFI->hasPrivateSegmentBuffer());
- return MFI->PrivateSegmentBufferUserSGPR;
- }
- assert(MFI->hasPrivateMemoryInputPtr());
- return MFI->PrivateMemoryPtrUserSGPR;
+ assert(MFI->hasPrivateSegmentBuffer());
+ return MFI->PrivateSegmentBufferUserSGPR;
+ case SIRegisterInfo::IMPLICIT_BUFFER_PTR:
+ assert(MFI->hasImplicitBufferPtr());
+ return MFI->ImplicitBufferPtrUserSGPR;
case SIRegisterInfo::KERNARG_SEGMENT_PTR:
assert(MFI->hasKernargSegmentPtr());
return MFI->KernargSegmentPtrUserSGPR;
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index 8fed6d5f9710f..600cc886cb595 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -197,12 +197,13 @@ public:
WORKGROUP_ID_Y = 11,
WORKGROUP_ID_Z = 12,
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
+ IMPLICIT_BUFFER_PTR = 15,
// VGPRS:
- FIRST_VGPR_VALUE = 15,
+ FIRST_VGPR_VALUE = 16,
WORKITEM_ID_X = FIRST_VGPR_VALUE,
- WORKITEM_ID_Y = 16,
- WORKITEM_ID_Z = 17
+ WORKITEM_ID_Y = 17,
+ WORKITEM_ID_Z = 18
};
/// \brief Returns the physical register that \p Value is stored in.
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index c5f121757e623..96a18544f02ac 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -92,6 +92,8 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
case AMDGPU::V_ADDC_U32_e64:
case AMDGPU::V_SUBB_U32_e64:
+ if (TII->getNamedOperand(MI, AMDGPU::OpName::src1)->isImm())
+ return false;
// Additional verification is needed for sdst/src2.
return true;
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index f581e69980c79..26515b27bb77d 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -538,6 +538,27 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
Reg == AMDGPU::SCC;
}
+bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
+
+ if (Reg0 == Reg1) {
+ return true;
+ }
+
+ unsigned SubReg0 = TRI->getSubReg(Reg0, 1);
+ if (SubReg0 == 0) {
+ return TRI->getSubRegIndex(Reg1, Reg0) > 0;
+ }
+
+ for (unsigned Idx = 2; SubReg0 > 0; ++Idx) {
+ if (isRegIntersect(Reg1, SubReg0, TRI)) {
+ return true;
+ }
+ SubReg0 = TRI->getSubReg(Reg0, Idx);
+ }
+
+ return false;
+}
+
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
switch(Reg) {
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index eff0230d21f57..936e4921a7097 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -271,6 +271,9 @@ bool isGFX9(const MCSubtargetInfo &STI);
/// \brief Is Reg - scalar register
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
+/// \brief Is there any intersection between registers
+bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
+
/// If \p Reg is a pseudo reg, return the correct hardware register given
/// \p STI otherwise return \p Reg.
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td
index 95b5ef0a49dba..96b33c373f052 100644
--- a/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/lib/Target/AMDGPU/VOP1Instructions.td
@@ -93,11 +93,6 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOP1";
}
-class VOP1_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
- VOP_SDWA9_Pseudo <OpName, P, pattern> {
- let AsmMatchConverter = "cvtSdwaVOP1";
-}
-
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret =
!if(P.HasModifiers,
@@ -117,7 +112,6 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _e32 : VOP1_Pseudo <opName, P>;
def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
- def _sdwa9 : VOP1_SDWA9_Pseudo <opName, P>;
}
// Special profile for instructions which have clamp
@@ -274,12 +268,10 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+
let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel);
- let InsSDWA9 = (ins Src0RC32:$vdst, Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
- clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel);
let Asm32 = getAsm32<1, 1>.ret;
let Asm64 = getAsm64<1, 1, 0, 1>.ret;
@@ -545,8 +537,8 @@ multiclass VOP1_Real_vi <bits<10> op> {
VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
def _sdwa_gfx9 :
- VOP_SDWA9_Real <!cast<VOP1_SDWA9_Pseudo>(NAME#"_sdwa9")>,
- VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>;
+ VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
// For now left dpp only for asm/dasm
// TODO: add corresponding pseudo
diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td
index 657cacaa792ca..7b9bc71ad4c77 100644
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@@ -114,11 +114,6 @@ class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOP2";
}
-class VOP2_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
- VOP_SDWA9_Pseudo <OpName, P, pattern> {
- let AsmMatchConverter = "cvtSdwaVOP2";
-}
-
class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set P.DstVT:$vdst,
@@ -139,7 +134,6 @@ multiclass VOP2Inst <string opName,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
- def _sdwa9 : VOP2_SDWA9_Pseudo <opName, P>;
}
multiclass VOP2bInst <string opName,
@@ -156,10 +150,6 @@ multiclass VOP2bInst <string opName,
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2b";
}
-
- def _sdwa9 : VOP2_SDWA9_Pseudo <opName, P> {
- let AsmMatchConverter = "cvtSdwaVOP2b";
- }
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -221,17 +211,13 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
VGPR_32:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
VGPR_32:$src2, // stub argument
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ clampmod:$clamp, omod:$omod,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
- let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
- Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1,
- VGPR_32:$src2, // stub argument
- clampmod:$clamp, omod:$omod,
- dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm32 = getAsm32<1, 2, vt>.ret;
let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
@@ -289,15 +275,10 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ clampmod:$clamp, omod:$omod,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
- let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
- Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1,
- clampmod:$clamp, omod:$omod,
- dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel, src1_sel:$src1_sel);
-
let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0,
Src1Mod:$src1_modifiers, Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
@@ -326,6 +307,8 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
let Ins64 = Ins32;
let Asm32 = " $vdst, $src0, $src1";
let Asm64 = Asm32;
+ let HasExt = 0;
+ let HasSDWA9 = 0;
}
def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> {
@@ -335,6 +318,8 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> {
let Ins64 = Ins32;
let Asm32 = " $vdst, $src0, $src1";
let Asm64 = Asm32;
+ let HasExt = 0;
+ let HasSDWA9 = 0;
}
//===----------------------------------------------------------------------===//
@@ -397,20 +382,29 @@ def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [], "">;
} // End isConvergent = 1
-defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>;
-defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32>;
-defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>;
-defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>;
-defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>;
-defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst"
-defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>;
-defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, AMDGPUpkrtz_f16_f32>;
-defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>;
-defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>;
+defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT<VOP_I32_I32_I32>>;
+defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>>;
+defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_lo>;
+defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
+defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
+defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>>;
+defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpkrtz_f16_f32>;
+defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>>;
+defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>>;
} // End SubtargetPredicate = isGCN
+def : Pat<
+ (AMDGPUadde i32:$src0, i32:$src1, i1:$src2),
+ (V_ADDC_U32_e64 $src0, $src1, $src2)
+>;
+
+def : Pat<
+ (AMDGPUsube i32:$src0, i32:$src1, i1:$src2),
+ (V_SUBB_U32_e64 $src0, $src1, $src2)
+>;
// These instructions only exist on SI and CI
let SubtargetPredicate = isSICI in {
@@ -728,8 +722,8 @@ multiclass VOP2_SDWA_Real <bits<6> op> {
multiclass VOP2_SDWA9_Real <bits<6> op> {
def _sdwa_gfx9 :
- VOP_SDWA9_Real <!cast<VOP2_SDWA9_Pseudo>(NAME#"_sdwa9")>,
- VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>;
+ VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
multiclass VOP2be_Real_e32e64_vi <bits<6> op> :
diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td
index cd347b86d3050..f3482a22d5dcd 100644
--- a/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/lib/Target/AMDGPU/VOPCInstructions.td
@@ -113,11 +113,6 @@ class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOPC";
}
-class VOPC_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
- VOP_SDWA9_Pseudo <OpName, P, pattern> {
- let AsmMatchConverter = "cvtSdwaVOPC";
-}
-
// This class is used only with VOPC instructions. Use $sdst for out operand
class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl {
@@ -189,13 +184,6 @@ multiclass VOPC_Pseudos <string opName,
let isConvergent = DefExec;
let isCompare = 1;
}
-
- def _sdwa9 : VOPC_SDWA9_Pseudo <opName, P> {
- let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
- let SchedRW = P.Schedule;
- let isConvergent = DefExec;
- let isCompare = 1;
- }
}
def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>;
@@ -540,14 +528,12 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> :
VOPC_Profile<sched, vt, i32> {
let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
let Asm64 = "$sdst, $src0_modifiers, $src1";
+
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
- let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
- Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1,
- src0_sel:$src0_sel, src1_sel:$src1_sel);
+
let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
- //let AsmSDWA9 = " $sdst, $src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let HasSrc1Mods = 0;
let HasClamp = 0;
let HasOMod = 0;
@@ -580,12 +566,6 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec> {
let SchedRW = p.Schedule;
let isConvergent = DefExec;
}
-
- def _sdwa9 : VOPC_SDWA9_Pseudo <opName, p> {
- let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
- let SchedRW = p.Schedule;
- let isConvergent = DefExec;
- }
}
def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>;
@@ -954,8 +934,8 @@ multiclass VOPC_Real_vi <bits<10> op> {
VOPC_SDWAe <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
def _sdwa_gfx9 :
- VOP_SDWA9_Real <!cast<VOPC_SDWA9_Pseudo>(NAME#"_sdwa9")>,
- VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>;
+ VOP_SDWA9_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
!cast<Instruction>(NAME#"_e32_vi")> {
diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td
index 4da654f84f9d1..e386f21c2ba49 100644
--- a/lib/Target/AMDGPU/VOPInstructions.td
+++ b/lib/Target/AMDGPU/VOPInstructions.td
@@ -232,11 +232,11 @@ class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
- let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
- let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
- let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
+ let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0)
+ let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1)
+ let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2)
- let Inst{14} = !if(P.HasOpSel, src2_modifiers{3}, 0); // op_sel_hi(2)
+ let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, 0); // op_sel_hi(2)
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
@@ -245,8 +245,8 @@ class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
let Inst{40-32} = !if(P.HasSrc0, src0, 0);
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
- let Inst{59} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel_hi(0)
- let Inst{60} = !if(P.HasOpSel, src1_modifiers{3}, 0); // op_sel_hi(1)
+ let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, 0); // op_sel_hi(0)
+ let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, 0); // op_sel_hi(1)
let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo)
let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo)
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
@@ -300,6 +300,19 @@ class VOP_SDWAe<VOPProfile P> : Enc64 {
let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
}
+// GFX9 adds two features to SDWA:
+// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD.
+// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather
+// than VGPRs (at most 1 can be an SGPR);
+// b. OMOD is the standard output modifier (result *2, *4, /2)
+// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This
+// replaces OMOD and the dest fields with SD and SDST (SGPR destination)
+// field.
+// a. When SD=1, the SDST is used as the destination for the compare result;
+// b. When SD=0, VCC is used.
+//
+// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA
+
// gfx9 SDWA basic encoding
class VOP_SDWA9e<VOPProfile P> : Enc64 {
bits<9> src0; // {src0_sgpr{0}, src0{7-0}}
@@ -353,6 +366,7 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
string Mnemonic = opName;
string AsmOperands = P.AsmSDWA;
+ string AsmOperands9 = P.AsmSDWA9;
let Size = 8;
let mayLoad = 0;
@@ -372,53 +386,9 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
VOPProfile Pfl = P;
}
-// GFX9 adds two features to SDWA:
-// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD.
-// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather
-// than VGPRs (at most 1 can be an SGPR);
-// b. OMOD is the standard output modifier (result *2, *4, /2)
-// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This
-// replaces OMOD and the dest fields with SD and SDST (SGPR destination)
-// field.
-// a. When SD=1, the SDST is used as the destination for the compare result;
-// b.when SD=0, VCC is used.
-//
-// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA
-
-class VOP_SDWA9_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
- InstSI <P.OutsSDWA9, P.InsSDWA9, "", pattern>,
- VOP <opName>,
- SIMCInstr <opName#"_sdwa9", SIEncodingFamily.NONE>,
- MnemonicAlias <opName#"_sdwa9", opName> {
-
- let isPseudo = 1;
- let isCodeGenOnly = 1;
- let UseNamedOperandTable = 1;
-
- string Mnemonic = opName;
- string AsmOperands = P.AsmSDWA9;
-
- let Size = 8;
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
-
- let VALU = 1;
- let SDWA = 1;
- let Uses = [EXEC];
-
- let SubtargetPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst);
- let AssemblerPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst);
- let AsmVariantName = !if(P.HasSDWA9, AMDGPUAsmVariants.SDWA9,
- AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "SDWA9";
-
- VOPProfile Pfl = P;
-}
-
class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
- SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA> {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -443,9 +413,9 @@ class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
let TSFlags = ps.TSFlags;
}
-class VOP_SDWA9_Real <VOP_SDWA9_Pseudo ps> :
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
- SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
+class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands9, []>,
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA9> {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -458,13 +428,15 @@ class VOP_SDWA9_Real <VOP_SDWA9_Pseudo ps> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
+ let SubtargetPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst);
+ let AssemblerPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst);
+ let AsmVariantName = !if(ps.Pfl.HasSDWA9, AMDGPUAsmVariants.SDWA9,
+ AMDGPUAsmVariants.Disable);
+ let DecoderNamespace = "SDWA9";
+
// Copy relevant pseudo op flags
- let SubtargetPredicate = ps.SubtargetPredicate;
- let AssemblerPredicate = ps.AssemblerPredicate;
let AsmMatchConverter = ps.AsmMatchConverter;
- let AsmVariantName = ps.AsmVariantName;
let UseNamedOperandTable = ps.UseNamedOperandTable;
- let DecoderNamespace = ps.DecoderNamespace;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index ca68f5d42c32c..6f67183df6a18 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -100,7 +100,8 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
"Enable Reliability, Availability and Serviceability extensions">;
def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
"Enable fast computation of positive address offsets">;
-
+def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
+ "CPU fuses AES crypto operations">;
// Cyclone has preferred instructions for zeroing VFP registers, which can
// execute in 0 cycles.
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index f9da036c7e468..90f635c812542 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1504,6 +1504,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case ARM::CONSTPOOL_ENTRY: {
+ if (Subtarget->genExecuteOnly())
+ llvm_unreachable("execute-only should not generate constant pools");
+
/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
/// in the function. The first operand is the ID# for this instruction, the
/// second is the index into the MachineConstantPool that this is, the third
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 8715657ad5e25..e0810c358f2da 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -665,12 +665,14 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
const ARMFunctionInfo *AFI =
MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
+ // In their ARM encoding, they can't be encoded in a conditional form.
+ if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
+ return false;
+
if (AFI->isThumb2Function()) {
if (getSubtarget().restrictIT())
return isV8EligibleForIT(&MI);
- } else { // non-Thumb
- if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
- return false;
}
return true;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 5b2d093e8f0da..2bcc707e9fc3c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2669,12 +2669,35 @@ static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOVi.
-static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
// FIXME there is no actual debug info here
SDLoc dl(Op);
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Res;
+
+ // When generating execute-only code Constant Pools must be promoted to the
+ // global data section. It's a bit ugly that we can't share them across basic
+ // blocks, but this way we guarantee that execute-only behaves correct with
+ // position-independent addressing modes.
+ if (Subtarget->genExecuteOnly()) {
+ auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ auto T = const_cast<Type*>(CP->getType());
+ auto C = const_cast<Constant*>(CP->getConstVal());
+ auto M = const_cast<Module*>(DAG.getMachineFunction().
+ getFunction()->getParent());
+ auto GV = new GlobalVariable(
+ *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
+ Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
+ Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
+ Twine(AFI->createPICLabelUId())
+ );
+ SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
+ dl, PtrVT);
+ return LowerGlobalAddress(GA, DAG);
+ }
+
if (CP->isMachineConstantPoolEntry())
Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
CP->getAlignment());
@@ -3118,6 +3141,19 @@ static bool isReadOnly(const GlobalValue *GV) {
isa<Function>(GV);
}
+SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Subtarget->getTargetTriple().getObjectFormat()) {
+ default: llvm_unreachable("unknown object format");
+ case Triple::COFF:
+ return LowerGlobalAddressWindows(Op, DAG);
+ case Triple::ELF:
+ return LowerGlobalAddressELF(Op, DAG);
+ case Triple::MachO:
+ return LowerGlobalAddressDarwin(Op, DAG);
+ }
+}
+
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -7634,21 +7670,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
- case ISD::ConstantPool:
- if (Subtarget->genExecuteOnly())
- llvm_unreachable("execute-only should not generate constant pools");
- return LowerConstantPool(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
- case ISD::GlobalAddress:
- switch (Subtarget->getTargetTriple().getObjectFormat()) {
- default: llvm_unreachable("unknown object format");
- case Triple::COFF:
- return LowerGlobalAddressWindows(Op, DAG);
- case Triple::ELF:
- return LowerGlobalAddressELF(Op, DAG);
- case Triple::MachO:
- return LowerGlobalAddressDarwin(Op, DAG);
- }
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 26da528c19e6d..5044134f5b1e2 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -601,6 +601,8 @@ class InstrItineraryData;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 45471a4e95b39..53db5acbe805c 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -4756,6 +4756,16 @@ def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift",
def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift",
(ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+// Aliases for the above with the .w qualifier
+def : t2InstAlias<"mov${p}.w $Rd, $shift",
+ (t2MOVsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def : t2InstAlias<"movs${p}.w $Rd, $shift",
+ (t2MOVSsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def : t2InstAlias<"mov${p}.w $Rd, $shift",
+ (t2MOVsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+def : t2InstAlias<"movs${p}.w $Rd, $shift",
+ (t2MOVSsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+
// ADR w/o the .w suffix
def : t2InstAlias<"adr${p} $Rd, $addr",
(t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 2ae3bad4076b0..4cb0eca5ee5f8 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -42,6 +42,10 @@ public:
private:
bool selectImpl(MachineInstr &I) const;
+ bool selectICmp(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const;
+
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
const ARMBaseTargetMachine &TM;
@@ -243,6 +247,105 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
return Opc;
}
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // Needs two compares...
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UEQ:
+ default:
+ // AL is our "false" for now. The other two need more compares.
+ return ARMCC::AL;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::FCMP_OEQ:
+ return ARMCC::EQ;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::FCMP_OGT:
+ return ARMCC::GT;
+ case CmpInst::ICMP_SGE:
+ case CmpInst::FCMP_OGE:
+ return ARMCC::GE;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::FCMP_UGT:
+ return ARMCC::HI;
+ case CmpInst::FCMP_OLT:
+ return ARMCC::MI;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::FCMP_OLE:
+ return ARMCC::LS;
+ case CmpInst::FCMP_ORD:
+ return ARMCC::VC;
+ case CmpInst::FCMP_UNO:
+ return ARMCC::VS;
+ case CmpInst::FCMP_UGE:
+ return ARMCC::PL;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::FCMP_ULT:
+ return ARMCC::LT;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::FCMP_ULE:
+ return ARMCC::LE;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::ICMP_NE:
+ return ARMCC::NE;
+ case CmpInst::ICMP_UGE:
+ return ARMCC::HS;
+ case CmpInst::ICMP_ULT:
+ return ARMCC::LO;
+ }
+}
+
+bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB,
+ const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const {
+ auto &MBB = *MIB->getParent();
+ auto InsertBefore = std::next(MIB->getIterator());
+ auto &DebugLoc = MIB->getDebugLoc();
+
+ // Move 0 into the result register.
+ auto Mov0I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVi))
+ .addDef(MRI.createVirtualRegister(&ARM::GPRRegClass))
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ if (!constrainSelectedInstRegOperands(*Mov0I, TII, TRI, RBI))
+ return false;
+
+ // Perform the comparison.
+ auto LHSReg = MIB->getOperand(2).getReg();
+ auto RHSReg = MIB->getOperand(3).getReg();
+ assert(MRI.getType(LHSReg) == MRI.getType(RHSReg) &&
+ MRI.getType(LHSReg).getSizeInBits() == 32 &&
+ MRI.getType(RHSReg).getSizeInBits() == 32 &&
+ "Unsupported types for comparison operation");
+ auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPrr))
+ .addUse(LHSReg)
+ .addUse(RHSReg)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
+ return false;
+
+ // Move 1 into the result register if the flags say so.
+ auto ResReg = MIB->getOperand(0).getReg();
+ auto Cond =
+ static_cast<CmpInst::Predicate>(MIB->getOperand(1).getPredicate());
+ auto ARMCond = getComparePred(Cond);
+ if (ARMCond == ARMCC::AL)
+ return false;
+
+ auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCi))
+ .addDef(ResReg)
+ .addUse(Mov0I->getOperand(0).getReg())
+ .addImm(1)
+ .add(predOps(ARMCond, ARM::CPSR));
+ if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI))
+ return false;
+
+ MIB->eraseFromParent();
+ return true;
+}
+
bool ARMInstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -343,6 +446,8 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(COPY));
return selectCopy(I, TII, MRI, TRI, RBI);
}
+ case G_ICMP:
+ return selectICmp(MIB, TII, MRI, TRI, RBI);
case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index a706079d98662..5873c7fb38729 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -86,6 +86,12 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_CONSTANT, s32}, Legal);
+ setAction({G_ICMP, s1}, Legal);
+ for (auto Ty : {s8, s16})
+ setAction({G_ICMP, 1, Ty}, WidenScalar);
+ for (auto Ty : {s32, p0})
+ setAction({G_ICMP, 1, Ty}, Legal);
+
if (!ST.useSoftFloat() && ST.hasVFP2()) {
setAction({G_FADD, s32}, Legal);
setAction({G_FADD, s64}, Legal);
diff --git a/lib/Target/ARM/ARMMacroFusion.cpp b/lib/Target/ARM/ARMMacroFusion.cpp
new file mode 100644
index 0000000000000..1b6e97c28d453
--- /dev/null
+++ b/lib/Target/ARM/ARMMacroFusion.cpp
@@ -0,0 +1,57 @@
+//===- ARMMacroFusion.cpp - ARM Macro Fusion ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the ARM implementation of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMacroFusion.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given SecondMI, when FirstMI is unspecified, then check if
+/// SecondMI may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(TSI);
+
+ // Assume wildcards for unspecified instrs.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
+
+ if (ST.hasFuseAES())
+ // Fuse AES crypto operations.
+ switch(SecondOpcode) {
+ // AES encode.
+ case ARM::AESMC :
+ return FirstOpcode == ARM::AESE ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ // AES decode.
+ case ARM::AESIMC:
+ return FirstOpcode == ARM::AESD ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ }
+
+ return false;
+}
+
+std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation () {
+ return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+}
+
+} // end namespace llvm
diff --git a/lib/Target/ARM/ARMMacroFusion.h b/lib/Target/ARM/ARMMacroFusion.h
new file mode 100644
index 0000000000000..1e4fc6687eae8
--- /dev/null
+++ b/lib/Target/ARM/ARMMacroFusion.h
@@ -0,0 +1,24 @@
+//===- ARMMacroFusion.h - ARM Macro Fusion ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the ARM definition of the DAG scheduling mutation
+/// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createARMMacroFusionDAGMutation());
+/// to ARMPassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation();
+
+} // llvm
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index f59b075e6dd9a..2350d0c6ef69e 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -255,6 +255,16 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping =
getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
+ case G_ICMP: {
+ LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
+ (void)Ty2;
+ assert(Ty2.getSizeInBits() == 32 && "Unsupported size for G_ICMP");
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr,
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]});
+ break;
+ }
case G_MERGE_VALUES: {
// We only support G_MERGE_VALUES for creating a double precision floating
// point value out of two GPRs.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index af682dd8321cf..d890d0fa777e8 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -285,6 +285,10 @@ protected:
/// HasFPAO - if true, processor does positive address offset computation faster
bool HasFPAO = false;
+ /// HasFuseAES - if true, processor executes back to back AES instruction
+ /// pairs faster.
+ bool HasFuseAES = false;
+
/// If true, if conversion may decide to leave some instructions unpredicated.
bool IsProfitableToUnpredicate = false;
@@ -561,6 +565,10 @@ public:
bool hasD16() const { return HasD16; }
bool hasFullFP16() const { return HasFullFP16; }
+ bool hasFuseAES() const { return HasFuseAES; }
+ /// \brief Return true if the CPU supports any kind of instruction fusion.
+ bool hasFusion() const { return hasFuseAES(); }
+
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index c0506cfda6129..eb71e557ec917 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -17,6 +17,7 @@
#include "ARMRegisterBankInfo.h"
#endif
#include "ARMSubtarget.h"
+#include "ARMMacroFusion.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
#include "ARMTargetTransformInfo.h"
@@ -394,6 +395,9 @@ public:
createMachineScheduler(MachineSchedContext *C) const override {
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
// add DAG Mutations here.
+ const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
+ if (ST.hasFusion())
+ DAG->addMutation(createARMMacroFusionDAGMutation());
return DAG;
}
@@ -401,6 +405,9 @@ public:
createPostMachineScheduler(MachineSchedContext *C) const override {
ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
// add DAG Mutations here.
+ const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
+ if (ST.hasFusion())
+ DAG->addMutation(createARMMacroFusionDAGMutation());
return DAG;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 19fba3033bb2b..891b5c60e1fd6 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6860,6 +6860,17 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
bool ARMAsmParser::processInstruction(MCInst &Inst,
const OperandVector &Operands,
MCStreamer &Out) {
+ // Check if we have the wide qualifier, because if it's present we
+ // must avoid selecting a 16-bit thumb instruction.
+ bool HasWideQualifier = false;
+ for (auto &Op : Operands) {
+ ARMOperand &ARMOp = static_cast<ARMOperand&>(*Op);
+ if (ARMOp.isToken() && ARMOp.getToken() == ".w") {
+ HasWideQualifier = true;
+ break;
+ }
+ }
+
switch (Inst.getOpcode()) {
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
@@ -6939,8 +6950,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// Select the narrow version if the immediate will fit.
if (Inst.getOperand(1).getImm() > 0 &&
Inst.getOperand(1).getImm() <= 0xff &&
- !(static_cast<ARMOperand &>(*Operands[2]).isToken() &&
- static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w"))
+ !HasWideQualifier)
Inst.setOpcode(ARM::tLDRpci);
else
Inst.setOpcode(ARM::t2LDRpci);
@@ -6971,10 +6981,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
else if (Inst.getOpcode() == ARM::t2LDRConstPool)
TmpInst.setOpcode(ARM::t2LDRpci);
const ARMOperand &PoolOperand =
- (static_cast<ARMOperand &>(*Operands[2]).isToken() &&
- static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w") ?
- static_cast<ARMOperand &>(*Operands[4]) :
- static_cast<ARMOperand &>(*Operands[3]);
+ (HasWideQualifier ?
+ static_cast<ARMOperand &>(*Operands[4]) :
+ static_cast<ARMOperand &>(*Operands[3]));
const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm();
// If SubExprVal is a constant we may be able to use a MOV
if (isa<MCConstantExpr>(SubExprVal) &&
@@ -8117,8 +8126,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
- !(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) {
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -8152,7 +8160,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(1).getReg()) &&
isARMLowRegister(Inst.getOperand(2).getReg()) &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
- inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr))
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr) &&
+ !HasWideQualifier)
isNarrow = true;
MCInst TmpInst;
unsigned newOpc;
@@ -8186,7 +8195,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
bool isNarrow = false;
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
- inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi) &&
+ !HasWideQualifier)
isNarrow = true;
MCInst TmpInst;
unsigned newOpc;
@@ -8415,10 +8425,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
!isARMLowRegister(Inst.getOperand(0).getReg()) ||
(Inst.getOperand(2).isImm() &&
(unsigned)Inst.getOperand(2).getImm() > 255) ||
- ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
- (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
+ Inst.getOperand(5).getReg() != (inITBlock() ? 0 : ARM::CPSR) ||
+ HasWideQualifier)
break;
MCInst TmpInst;
TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
@@ -8447,8 +8455,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
}
if (!Transform ||
Inst.getOperand(5).getReg() != 0 ||
- (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
+ HasWideQualifier)
break;
MCInst TmpInst;
TmpInst.setOpcode(ARM::tADDhirr);
@@ -8568,11 +8575,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
(Inst.getOperand(1).isImm() &&
(unsigned)Inst.getOperand(1).getImm() <= 255) &&
- ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
- Inst.getOperand(4).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
- (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
- static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ Inst.getOperand(4).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !HasWideQualifier) {
// The operands aren't in the same order for tMOVi8...
MCInst TmpInst;
TmpInst.setOpcode(ARM::tMOVi8);
@@ -8593,8 +8597,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(2).getImm() == ARMCC::AL &&
Inst.getOperand(4).getReg() == ARM::CPSR &&
- (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
- static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ !HasWideQualifier) {
// The operands aren't the same for tMOV[S]r... (no cc_out)
MCInst TmpInst;
TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
@@ -8616,8 +8619,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(2).getImm() == 0 &&
- (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
- static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Illegal opcode!");
@@ -8716,11 +8718,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
isARMLowRegister(Inst.getOperand(2).getReg())) &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
- ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
- (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
- !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
- ".w"))) {
+ Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -8756,11 +8755,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(2).getReg())) &&
(Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() ||
Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) &&
- ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
- (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
- !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
- ".w"))) {
+ Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 3cde43967568b..cf6827fd6ca19 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -49,6 +49,7 @@ add_llvm_target(ARMCodeGen
ARMLoadStoreOptimizer.cpp
ARMMCInstLower.cpp
ARMMachineFunctionInfo.cpp
+ ARMMacroFusion.cpp
ARMRegisterInfo.cpp
ARMOptimizeBarriersPass.cpp
ARMSelectionDAGInfo.cpp
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 716492ea25662..81760f03940ad 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -358,11 +358,27 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
return Value;
}
-unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target, uint64_t Value,
bool IsPCRel, MCContext &Ctx,
bool IsLittleEndian,
bool IsResolved) const {
unsigned Kind = Fixup.getKind();
+
+ // MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT
+ // and .word relocations they put the Thumb bit into the addend if possible.
+ // Other relocation types don't want this bit though (branches couldn't encode
+ // it if it *was* present, and no other relocations exist) and it can
+ // interfere with checking valid expressions.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ if (A->hasSubsectionsViaSymbols() && Asm.isThumbFunc(&A->getSymbol()) &&
+ (Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 ||
+ Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 ||
+ Kind == ARM::fixup_t2_movt_hi16))
+ Value |= 1;
+ }
+
switch (Kind) {
default:
Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type");
@@ -505,6 +521,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return swapHalfWords(out, IsLittleEndian);
}
case ARM::fixup_arm_thumb_bl: {
+ // FIXME: We get both thumb1 and thumb2 in here, so we can only check for
+ // the less strict thumb2 value.
+ if (!isInt<26>(Value - 4)) {
+ Ctx.reportError(Fixup.getLoc(), "Relocation out of range");
+ return 0;
+ }
+
// The value doesn't encode the low bit (always zero) and is offset by
// four. The 32-bit immediate value is encoded as
// imm32 = SignExtend(S:I1:I2:imm10:imm11:0)
@@ -716,29 +739,11 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
const MCFixup &Fixup,
- const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) {
+ const MCValue &Target, bool &IsResolved) {
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
const unsigned FixupKind = Fixup.getKind() ;
- // MachO (the only user of "Value") tries to make .o files that look vaguely
- // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit
- // into the addend if possible. Other relocation types don't want this bit
- // though (branches couldn't encode it if it *was* present, and no other
- // relocations exist) and it can interfere with checking valid expressions.
- if (FixupKind == FK_Data_4 ||
- FixupKind == ARM::fixup_arm_movw_lo16 ||
- FixupKind == ARM::fixup_arm_movt_hi16 ||
- FixupKind == ARM::fixup_t2_movw_lo16 ||
- FixupKind == ARM::fixup_t2_movt_hi16) {
- if (Sym) {
- if (Asm.isThumbFunc(Sym))
- Value |= 1;
- }
- }
if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
assert(Sym && "How did we resolve this?");
@@ -747,7 +752,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// If the symbol is out of range, produce a relocation and hope the
// linker can handle it. GNU AS produces an error in this case.
- if (Sym->isExternal() || Value >= 0x400004)
+ if (Sym->isExternal())
IsResolved = false;
}
// Create relocations for unconditional branches to function symbols with
@@ -759,6 +764,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
IsResolved = false;
if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br ||
FixupKind == ARM::fixup_arm_thumb_bl ||
+ FixupKind == ARM::fixup_t2_condbranch ||
FixupKind == ARM::fixup_t2_uncondbranch))
IsResolved = false;
}
@@ -875,22 +881,25 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
}
}
-void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value, bool IsPCRel,
- MCContext &Ctx) const {
+void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value = adjustFixupValue(Fixup, Value, IsPCRel, Ctx, IsLittleEndian, true);
+ MCContext &Ctx = Asm.getContext();
+ Value = adjustFixupValue(Asm, Fixup, Target, Value, IsPCRel, Ctx,
+ IsLittleEndian, true);
if (!Value)
return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
// Used to point to big endian bytes.
unsigned FullSizeBytes;
if (!IsLittleEndian) {
FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind());
- assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!");
+ assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!");
assert(NumBytes <= FullSizeBytes && "Invalid fixup size!");
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 2ddedb5d61059..6a0ba2ed41c1a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -40,17 +40,17 @@ public:
/// processFixupValue - Target hook to process the literal value of a fixup
/// if necessary.
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
+ void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, bool &IsResolved) override;
- unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel,
+ unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, uint64_t Value, bool IsPCRel,
MCContext &Ctx, bool IsLittleEndian,
bool IsResolved) const;
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
unsigned getRelaxedOpcode(unsigned Op) const;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 00505a103e00f..f74fb2e20b5a3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -33,8 +33,8 @@ public:
~ARMWinCOFFObjectWriter() override = default;
- unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsCrossSection,
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsCrossSection,
const MCAsmBackend &MAB) const override;
bool recordRelocation(const MCFixup &) const override;
@@ -42,7 +42,8 @@ public:
} // end anonymous namespace
-unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
+unsigned ARMWinCOFFObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
const MCFixup &Fixup,
bool IsCrossSection,
const MCAsmBackend &MAB) const {
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 7d5fb6ca17b98..c6ddd6bdad5e6 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -214,7 +214,12 @@ void BPFDAGToDAGISel::PreprocessISelDAG() {
if (Opcode != ISD::LOAD)
continue;
- unsigned char new_val[8]; // hold up the constant values replacing loads.
+ union {
+ uint8_t c[8];
+ uint16_t s;
+ uint32_t i;
+ uint64_t d;
+ } new_val; // hold up the constant values replacing loads.
bool to_replace = false;
SDLoc DL(Node);
const LoadSDNode *LD = cast<LoadSDNode>(Node);
@@ -242,7 +247,7 @@ void BPFDAGToDAGISel::PreprocessISelDAG() {
const ConstantSDNode *CDN = dyn_cast<ConstantSDNode>(OP2.getNode());
if (GADN && CDN)
to_replace =
- getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val);
+ getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c);
} else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END &&
LDAddrNode->getNumOperands() > 0) {
DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n');
@@ -250,7 +255,7 @@ void BPFDAGToDAGISel::PreprocessISelDAG() {
SDValue OP1 = LDAddrNode->getOperand(0);
if (const GlobalAddressSDNode *GADN =
dyn_cast<GlobalAddressSDNode>(OP1.getNode()))
- to_replace = getConstantFieldValue(GADN, 0, size, new_val);
+ to_replace = getConstantFieldValue(GADN, 0, size, new_val.c);
}
if (!to_replace)
@@ -259,13 +264,13 @@ void BPFDAGToDAGISel::PreprocessISelDAG() {
// replacing the old with a new value
uint64_t val;
if (size == 1)
- val = *(uint8_t *)new_val;
+ val = new_val.c[0];
else if (size == 2)
- val = *(uint16_t *)new_val;
+ val = new_val.s;
else if (size == 4)
- val = *(uint32_t *)new_val;
+ val = new_val.i;
else {
- val = *(uint64_t *)new_val;
+ val = new_val.d;
}
DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
@@ -318,14 +323,17 @@ bool BPFDAGToDAGISel::getConstantFieldValue(const GlobalAddressSDNode *Node,
}
// test whether host endianness matches target
- uint8_t test_buf[2];
+ union {
+ uint8_t c[2];
+ uint16_t s;
+ } test_buf;
uint16_t test_val = 0x2345;
if (DL.isLittleEndian())
- support::endian::write16le(test_buf, test_val);
+ support::endian::write16le(test_buf.c, test_val);
else
- support::endian::write16be(test_buf, test_val);
+ support::endian::write16be(test_buf.c, test_val);
- bool endian_match = *(uint16_t *)test_buf == test_val;
+ bool endian_match = test_buf.s == test_val;
for (uint64_t i = Offset, j = 0; i < Offset + Size; i++, j++)
ByteSeq[j] = endian_match ? TmpVal[i] : TmpVal[Offset + Size - 1 - j];
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 80357a63a4e12..15e89fb2a2611 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -27,8 +27,9 @@ public:
: MCAsmBackend(), IsLittleEndian(IsLittleEndian) {}
~BPFAsmBackend() override = default;
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -61,9 +62,10 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
-void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value, bool IsPCRel,
- MCContext &Ctx) const {
+void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsPCRel) const {
if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
assert(Value == 0);
} else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) {
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 9f8c9ded8127b..734f3c6658d92 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -567,8 +567,19 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
}
llvm_unreachable("Invalid register operand");
}
- if (SO.isImm() || SO.isFPImm())
- return IfTrue ? C2_cmoveit : C2_cmoveif;
+ switch (SO.getType()) {
+ case MachineOperand::MO_Immediate:
+ case MachineOperand::MO_FPImmediate:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_BlockAddress:
+ return IfTrue ? C2_cmoveit : C2_cmoveif;
+ default:
+ break;
+ }
llvm_unreachable("Unexpected source operand");
}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 18e49c69b8e36..2b0ceaa66258e 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -1051,10 +1051,26 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
bool HasExtraAlign = HRI.needsStackRealignment(MF);
bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
+ unsigned FrameSize = MFI.getStackSize();
unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
unsigned AP = HMFI.getStackAlignBasePhysReg();
- unsigned FrameSize = MFI.getStackSize();
+ // It may happen that AP will be absent even HasAlloca && HasExtraAlign
+ // is true. HasExtraAlign may be set because of vector spills, without
+ // aligned locals or aligned outgoing function arguments. Since vector
+ // spills will ultimately be "unaligned", it is safe to use FP as the
+ // base register.
+ // In fact, in such a scenario the stack is actually not required to be
+ // aligned, although it may end up being aligned anyway, since this
+ // particular case is not easily detectable. The alignment will be
+ // unnecessary, but not incorrect.
+ // Unfortunately there is no quick way to verify that the above is
+ // indeed the case (and that it's not a result of an error), so just
+ // assume that missing AP will be replaced by FP.
+ // (A better fix would be to rematerialize AP from FP and always align
+ // vector spills.)
+ if (AP == 0)
+ AP = FP;
bool UseFP = false, UseAP = false; // Default: use SP (except at -O0).
// Use FP at -O0, except when there are objects with extra alignment.
@@ -2454,9 +2470,44 @@ bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const {
unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF);
auto &HST = MF.getSubtarget<HexagonSubtarget>();
// A fairly simplistic guess as to whether a potential load/store to a
- // stack location could require an extra register. It does not account
- // for store-immediate instructions.
- if (HST.useHVXOps())
- return StackSize > 256;
+ // stack location could require an extra register.
+ if (HST.useHVXOps() && StackSize > 256)
+ return true;
+
+ // Check if the function has store-immediate instructions that access
+ // the stack. Since the offset field is not extendable, if the stack
+ // size exceeds the offset limit (6 bits, shifted), the stores will
+ // require a new base register.
+ bool HasImmStack = false;
+ unsigned MinLS = ~0u; // Log_2 of the memory access size.
+
+ for (const MachineBasicBlock &B : MF) {
+ for (const MachineInstr &MI : B) {
+ unsigned LS = 0;
+ switch (MI.getOpcode()) {
+ case Hexagon::S4_storeirit_io:
+ case Hexagon::S4_storeirif_io:
+ case Hexagon::S4_storeiri_io:
+ ++LS;
+ LLVM_FALLTHROUGH;
+ case Hexagon::S4_storeirht_io:
+ case Hexagon::S4_storeirhf_io:
+ case Hexagon::S4_storeirh_io:
+ ++LS;
+ LLVM_FALLTHROUGH;
+ case Hexagon::S4_storeirbt_io:
+ case Hexagon::S4_storeirbf_io:
+ case Hexagon::S4_storeirb_io:
+ if (MI.getOperand(0).isFI())
+ HasImmStack = true;
+ MinLS = std::min(MinLS, LS);
+ break;
+ }
+ }
+ }
+
+ if (HasImmStack)
+ return !isUInt<6>(StackSize >> MinLS);
+
return false;
}
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
index 11ac5454f6043..5abbcbba72ddd 100644
--- a/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -295,15 +296,12 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0;
unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0;
bool Failure = false, CanUp = true, CanDown = true;
- bool Used1 = false, Used2 = false;
for (unsigned X = MinX+1; X < MaxX; X++) {
const DefUseInfo &DU = DUM.lookup(X);
if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) {
Failure = true;
break;
}
- Used1 |= DU.Uses[SR1];
- Used2 |= DU.Uses[SR2];
if (CanDown && DU.Defs[SR1])
CanDown = false;
if (CanUp && DU.Defs[SR2])
@@ -317,64 +315,52 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
// Prefer "down", since this will move the MUX farther away from the
// predicate definition.
MachineBasicBlock::iterator At = CanDown ? Def2 : Def1;
- if (CanDown) {
- // If the MUX is placed "down", we need to make sure that there aren't
- // any kills of the source registers between the two defs.
- if (Used1 || Used2) {
- auto ResetKill = [this] (unsigned Reg, MachineInstr &MI) -> bool {
- if (MachineOperand *Op = MI.findRegisterUseOperand(Reg, true, HRI)) {
- Op->setIsKill(false);
- return true;
- }
- return false;
- };
- bool KilledSR1 = false, KilledSR2 = false;
- for (MachineInstr &MJ : make_range(std::next(It1), It2)) {
- if (SR1)
- KilledSR1 |= ResetKill(SR1, MJ);
- if (SR2)
- KilledSR2 |= ResetKill(SR1, MJ);
- }
- // If any of the source registers were killed in this range, transfer
- // the kills to the source operands: they will me "moved" to the
- // resulting MUX and their parent instructions will be deleted.
- if (KilledSR1) {
- assert(Src1->isReg());
- Src1->setIsKill(true);
- }
- if (KilledSR2) {
- assert(Src2->isReg());
- Src2->setIsKill(true);
- }
- }
- } else {
- // If the MUX is placed "up", it shouldn't kill any source registers
- // that are still used afterwards. We can reset the kill flags directly
- // on the operands, because the source instructions will be erased.
- if (Used1 && Src1->isReg())
- Src1->setIsKill(false);
- if (Used2 && Src2->isReg())
- Src2->setIsKill(false);
- }
ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2));
}
- for (unsigned I = 0, N = ML.size(); I < N; ++I) {
- MuxInfo &MX = ML[I];
- MachineBasicBlock &B = *MX.At->getParent();
- DebugLoc DL = MX.At->getDebugLoc();
+ for (MuxInfo &MX : ML) {
unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF);
if (!MxOpc)
continue;
- BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
- .addReg(MX.PredR)
- .add(*MX.SrcT)
- .add(*MX.SrcF);
+ MachineBasicBlock &B = *MX.At->getParent();
+ const DebugLoc &DL = B.findDebugLoc(MX.At);
+ auto NewMux = BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
+ .addReg(MX.PredR)
+ .add(*MX.SrcT)
+ .add(*MX.SrcF);
+ NewMux->clearKillInfo();
B.erase(MX.Def1);
B.erase(MX.Def2);
Changed = true;
}
+ // Fix up kill flags.
+
+ LivePhysRegs LPR(*HRI);
+ LPR.addLiveOuts(B);
+ auto IsLive = [&LPR,this] (unsigned Reg) -> bool {
+ for (MCSubRegIterator S(Reg, HRI, true); S.isValid(); ++S)
+ if (LPR.contains(*S))
+ return true;
+ return false;
+ };
+ for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+ // This isn't 100% accurate, but it's safe.
+ // It won't detect (as a kill) a case like this
+ // r0 = add r0, 1 <-- r0 should be "killed"
+ // ... = r0
+ for (MachineOperand &Op : I->operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ assert(Op.getSubReg() == 0 && "Should have physical registers only");
+ bool Live = IsLive(Op.getReg());
+ Op.setIsKill(!Live);
+ }
+ LPR.stepBackward(*I);
+ }
+
return Changed;
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index f43101fa456d5..fec2dc5ce3066 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -94,10 +94,6 @@ static cl::opt<bool> UseDFAHazardRec("dfa-hazard-rec",
///
/// Constants for Hexagon instructions.
///
-const int Hexagon_MEMV_OFFSET_MAX_128B = 896; // #s4: -8*128...7*128
-const int Hexagon_MEMV_OFFSET_MIN_128B = -1024; // #s4
-const int Hexagon_MEMV_OFFSET_MAX = 448; // #s4: -8*64...7*64
-const int Hexagon_MEMV_OFFSET_MIN = -512; // #s4
const int Hexagon_MEMW_OFFSET_MAX = 4095;
const int Hexagon_MEMW_OFFSET_MIN = -4096;
const int Hexagon_MEMD_OFFSET_MAX = 8191;
@@ -2443,8 +2439,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::V6_vS32b_ai:
case Hexagon::V6_vL32Ub_ai:
case Hexagon::V6_vS32Ub_ai:
- return (Offset >= Hexagon_MEMV_OFFSET_MIN) &&
- (Offset <= Hexagon_MEMV_OFFSET_MAX);
+ return isShiftedInt<4,6>(Offset);
case Hexagon::PS_vstorerq_ai_128B:
case Hexagon::PS_vstorerw_ai_128B:
@@ -2454,8 +2449,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::V6_vS32b_ai_128B:
case Hexagon::V6_vL32Ub_ai_128B:
case Hexagon::V6_vS32Ub_ai_128B:
- return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) &&
- (Offset <= Hexagon_MEMV_OFFSET_MAX_128B);
+ return isShiftedInt<4,7>(Offset);
case Hexagon::J2_loop0i:
case Hexagon::J2_loop1i:
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index d73fc7c73185d..de6b203015d8e 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -629,7 +629,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
if (MO.isReg() && MO.isUse()) {
unsigned feederReg = MO.getReg();
for (MachineBasicBlock::iterator localII = feederPos,
- end = jmpPos; localII != end; localII++) {
+ end = cmpInstr->getIterator(); localII != end; localII++) {
MachineInstr &localMI = *localII;
for (unsigned j = 0; j < localMI.getNumOperands(); j++) {
MachineOperand &localMO = localMI.getOperand(j);
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index ee3209354688d..7d961a238ae28 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -100,9 +100,6 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
-
- private:
- void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src);
};
}
@@ -132,7 +129,9 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
PeepholeDoubleRegsMap.clear();
// Traverse the basic block.
- for (MachineInstr &MI : *MBB) {
+ for (auto I = MBB->begin(), E = MBB->end(), NextI = I; I != E; I = NextI) {
+ NextI = std::next(I);
+ MachineInstr &MI = *I;
// Look for sign extends:
// %vreg170<def> = SXTW %vreg166
if (!DisableOptSZExt && MI.getOpcode() == Hexagon::A2_sxtw) {
@@ -280,14 +279,13 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
if (NewOp) {
unsigned PSrc = MI.getOperand(PR).getReg();
if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
- MI.getOperand(PR).setReg(POrig);
+ BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(),
+ QII->get(NewOp), MI.getOperand(0).getReg())
+ .addReg(POrig)
+ .add(MI.getOperand(S2))
+ .add(MI.getOperand(S1));
MRI->clearKillFlags(POrig);
- MI.setDesc(QII->get(NewOp));
- // Swap operands S1 and S2.
- MachineOperand Op1 = MI.getOperand(S1);
- MachineOperand Op2 = MI.getOperand(S2);
- ChangeOpInto(MI.getOperand(S1), Op2);
- ChangeOpInto(MI.getOperand(S2), Op1);
+ MI.eraseFromParent();
}
} // if (NewOp)
} // if (!Done)
@@ -299,40 +297,6 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) {
- assert (&Dst != &Src && "Cannot duplicate into itself");
- switch (Dst.getType()) {
- case MachineOperand::MO_Register:
- if (Src.isReg()) {
- Dst.setReg(Src.getReg());
- Dst.setSubReg(Src.getSubReg());
- MRI->clearKillFlags(Src.getReg());
- } else if (Src.isImm()) {
- Dst.ChangeToImmediate(Src.getImm());
- } else {
- llvm_unreachable("Unexpected src operand type");
- }
- break;
-
- case MachineOperand::MO_Immediate:
- if (Src.isImm()) {
- Dst.setImm(Src.getImm());
- } else if (Src.isReg()) {
- Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(),
- false, Src.isDead(), Src.isUndef(),
- Src.isDebug());
- Dst.setSubReg(Src.getSubReg());
- } else {
- llvm_unreachable("Unexpected src operand type");
- }
- break;
-
- default:
- llvm_unreachable("Unexpected dst operand type");
- break;
- }
-}
-
FunctionPass *llvm::createHexagonPeephole() {
return new HexagonPeephole();
}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 8851a23ae8ace..0aada8a53c979 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -1,4 +1,4 @@
-//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===//
+//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,13 +11,23 @@
//
//===----------------------------------------------------------------------===//
-#include "HexagonSubtarget.h"
#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
#include <map>
using namespace llvm;
@@ -119,9 +129,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
StringRef FS, const TargetMachine &TM)
: HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering() {
-
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {
initializeEnvironment();
// Initialize scheduling itinerary for the specified CPU.
@@ -196,7 +204,6 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
updateLatency(*SrcInst, *DstInst, Dep);
}
-
void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) {
for (auto &SU : DAG->SUnits) {
if (!SU.isInstr())
@@ -240,18 +247,18 @@ void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) {
}
}
-
void HexagonSubtarget::getPostRAMutations(
- std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
- Mutations.push_back(make_unique<HexagonSubtarget::HexagonDAGMutation>());
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
+ Mutations.push_back(
+ llvm::make_unique<HexagonSubtarget::HexagonDAGMutation>());
}
void HexagonSubtarget::getSMSMutations(
- std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
- Mutations.push_back(make_unique<HexagonSubtarget::HexagonDAGMutation>());
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
+ Mutations.push_back(
+ llvm::make_unique<HexagonSubtarget::HexagonDAGMutation>());
}
-
// Pin the vtable to this file.
void HexagonSubtarget::anchor() {}
@@ -447,4 +454,3 @@ unsigned HexagonSubtarget::getL1PrefetchDistance() const {
bool HexagonSubtarget::enableSubRegLiveness() const {
return EnableSubregLiveness;
}
-
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 4379efa79c9cd..753dca0000652 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -1,4 +1,4 @@
-//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===//
+//===- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,12 +15,17 @@
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
#include "HexagonFrameLowering.h"
-#include "HexagonISelLowering.h"
#include "HexagonInstrInfo.h"
+#include "HexagonISelLowering.h"
#include "HexagonSelectionDAGInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <memory>
#include <string>
+#include <vector>
#define GET_SUBTARGETINFO_HEADER
#include "HexagonGenSubtargetInfo.inc"
@@ -30,6 +35,12 @@
namespace llvm {
+class MachineInstr;
+class SDep;
+class SUnit;
+class TargetMachine;
+class Triple;
+
class HexagonSubtarget : public HexagonGenSubtargetInfo {
virtual void anchor();
@@ -57,6 +68,7 @@ private:
HexagonSelectionDAGInfo TSInfo;
HexagonFrameLowering FrameLowering;
InstrItineraryData InstrItins;
+
void initializeEnvironment();
public:
@@ -108,6 +120,7 @@ public:
bool useBSBScheduling() const { return UseBSBScheduling; }
bool enableMachineScheduler() const override;
+
// Always use the TargetLowering default scheduler.
// FIXME: This will use the vliw scheduler which is probably just hurting
// compiler time and will be removed eventually anyway.
@@ -124,6 +137,7 @@ public:
unsigned getSmallDataThreshold() const {
return Hexagon_SMALL_DATA_THRESHOLD;
}
+
const HexagonArchEnum &getHexagonArchVersion() const {
return HexagonArchVersion;
}
@@ -155,4 +169,4 @@ private:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index e507a797871fc..031a1bdefafbf 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -291,7 +291,6 @@ bool HexagonPassConfig::addInstSelector() {
if (EnableBitSimplify)
addPass(createHexagonBitSimplify());
addPass(createHexagonPeephole());
- printAndVerify("After hexagon peephole pass");
// Constant propagation.
if (!DisableHCP) {
addPass(createHexagonConstPropagationPass());
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 545c8b6b2acde..093ce80bc2e3f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -202,10 +202,8 @@ public:
/// processFixupValue - Target hook to adjust the literal value of a fixup
/// if necessary. IsResolved signals whether the caller believes a relocation
/// is needed; the target can modify the value. The default does nothing.
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override {
+ void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, bool &IsResolved) override {
MCFixupKind Kind = Fixup.getKind();
switch((unsigned)Kind) {
@@ -415,9 +413,9 @@ public:
/// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
/// data fragment, at the offset specified by the fixup and following the
/// fixup kind as appropriate.
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t FixupValue, bool IsPCRel,
- MCContext &Ctx) const override {
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t FixupValue, bool IsPCRel) const override {
// When FixupValue is 0 the relocation is external and there
// is nothing for us to do.
@@ -432,8 +430,8 @@ public:
// to a real offset before we can use it.
uint32_t Offset = Fixup.getOffset();
unsigned NumBytes = getFixupKindNumBytes(Kind);
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
- char *InstAddr = Data + Offset;
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ char *InstAddr = Data.data() + Offset;
Value = adjustFixupValue(Kind, FixupValue);
if(!Value)
@@ -517,7 +515,7 @@ public:
dbgs() << "\tBValue=0x"; dbgs().write_hex(Value) <<
": AValue=0x"; dbgs().write_hex(FixupValue) <<
": Offset=" << Offset <<
- ": Size=" << DataSize <<
+ ": Size=" << Data.size() <<
": OInst=0x"; dbgs().write_hex(OldData) <<
": Reloc=0x"; dbgs().write_hex(Reloc););
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index e8f154a1fa533..c7114c7f18a0a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -701,33 +701,32 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
break;
case Hexagon::A2_addi:
Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
- assert(Absolute);(void)Absolute;
- if (Value == 1) {
- Result.setOpcode(Hexagon::SA1_inc);
- addOps(Result, Inst, 0);
- addOps(Result, Inst, 1);
- break;
- } // 1,2 SUBInst $Rd = add($Rs, #1)
- else if (Value == -1) {
- Result.setOpcode(Hexagon::SA1_dec);
- addOps(Result, Inst, 0);
- addOps(Result, Inst, 1);
- addOps(Result, Inst, 2);
- break;
- } // 1,2 SUBInst $Rd = add($Rs,#-1)
- else if (Inst.getOperand(1).getReg() == Hexagon::R29) {
- Result.setOpcode(Hexagon::SA1_addsp);
- addOps(Result, Inst, 0);
- addOps(Result, Inst, 2);
- break;
- } // 1,3 SUBInst $Rd = add(r29, #$u6_2)
- else {
- Result.setOpcode(Hexagon::SA1_addi);
- addOps(Result, Inst, 0);
- addOps(Result, Inst, 1);
- addOps(Result, Inst, 2);
- break;
- } // 1,2,3 SUBInst $Rx = add($Rx, #$s7)
+ if (Absolute) {
+ if (Value == 1) {
+ Result.setOpcode(Hexagon::SA1_inc);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break;
+ } // 1,2 SUBInst $Rd = add($Rs, #1)
+ if (Value == -1) {
+ Result.setOpcode(Hexagon::SA1_dec);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break;
+ } // 1,2 SUBInst $Rd = add($Rs,#-1)
+ if (Inst.getOperand(1).getReg() == Hexagon::R29) {
+ Result.setOpcode(Hexagon::SA1_addsp);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break;
+ } // 1,3 SUBInst $Rd = add(r29, #$u6_2)
+ }
+ Result.setOpcode(Hexagon::SA1_addi);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rx = add($Rx, #$s7)
case Hexagon::A2_add:
Result.setOpcode(Hexagon::SA1_addrx);
addOps(Result, Inst, 0);
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index 0ef1401ef531a..c212726113ab7 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -49,8 +49,9 @@ public:
LanaiAsmBackend(const Target &T, Triple::OSType OST)
: MCAsmBackend(), OSType(OST) {}
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -88,9 +89,10 @@ bool LanaiAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
-void LanaiAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned /*DataSize*/, uint64_t Value,
- bool /*IsPCRel*/, MCContext & /*Ctx*/) const {
+void LanaiAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool /*IsPCRel*/) const {
MCFixupKind Kind = Fixup.getKind();
Value = adjustFixupValue(static_cast<unsigned>(Kind), Value);
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index d8fdc8ba674e6..982c6fea62d44 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -32,16 +32,20 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
+static std::string computeDataLayout(const Triple &TT, StringRef CPU,
+ const TargetOptions &Options) {
+ return "e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16";
+}
+
MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Optional<Reloc::Model> RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, "e-m:e-p:16:16-i32:16:32-a:16-n8:16", TT, CPU, FS,
+ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS,
Options, getEffectiveRelocModel(RM), CM, OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
- // FIXME: Check DataLayout string.
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 694c201cbe8dc..9d5c179a0fd90 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -322,6 +322,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseDirectiveSet();
bool parseDirectiveOption();
bool parseInsnDirective();
+ bool parseRSectionDirective(StringRef Section);
bool parseSSectionDirective(StringRef Section, unsigned Type);
bool parseSetAtDirective();
@@ -5106,7 +5107,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
CC = StringSwitch<unsigned>(Name)
.Case("zero", 0)
- .Case("at", 1)
+ .Cases("at", "AT", 1)
.Case("a0", 4)
.Case("a1", 5)
.Case("a2", 6)
@@ -6952,6 +6953,23 @@ bool MipsAsmParser::parseInsnDirective() {
return false;
}
+/// parseRSectionDirective
+/// ::= .rdata
+bool MipsAsmParser::parseRSectionDirective(StringRef Section) {
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ MCSection *ELFSection = getContext().getELFSection(
+ Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ getParser().getStreamer().SwitchSection(ELFSection);
+
+ getParser().Lex(); // Eat EndOfStatement token.
+ return false;
+}
+
/// parseSSectionDirective
/// ::= .sbss
/// ::= .sdata
@@ -7499,6 +7517,10 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
parseInsnDirective();
return false;
}
+ if (IDVal == ".rdata") {
+ parseRSectionDirective(".rodata");
+ return false;
+ }
if (IDVal == ".sbss") {
parseSSectionDirective(IDVal, ELF::SHT_NOBITS);
return false;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 6d3d4db036032..ae48d6e38fa0f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -235,10 +235,12 @@ static unsigned calculateMMLEIndex(unsigned i) {
/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided
/// data fragment, at the offset specified by the fixup and following the
/// fixup kind as appropriate.
-void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value, bool IsPCRel,
- MCContext &Ctx) const {
+void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsPCRel) const {
MCFixupKind Kind = Fixup.getKind();
+ MCContext &Ctx = Asm.getContext();
Value = adjustFixupValue(Fixup, Value, Ctx);
if (!Value)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 4b3cc6e21f4cd..bf3b290b7ed53 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -38,8 +38,9 @@ public:
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 3272319ad50f4..7daea163b8a64 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -326,9 +326,9 @@ class AUIPC_DESC : ALUIPC_DESC_BASE<"auipc", GPR32Opnd, II_AUIPC>;
class AUI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
InstrItinClass itin = NoItinerary>
: MipsR6Arch<instr_asm> {
- dag OutOperandList = (outs GPROpnd:$rs);
- dag InOperandList = (ins GPROpnd:$rt, uimm16:$imm);
- string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $imm");
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins GPROpnd:$rs, uimm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm");
list<dag> Pattern = [];
InstrItinClass Itinerary = itin;
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 68708dc4f50fe..02102d6b22f4e 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -907,6 +907,11 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
if (!(CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1))))
return SDValue();
}
+ // Don't generate INS if constant OR operand doesn't fit into bits
+ // cleared by constant AND operand.
+ if (CN->getSExtValue() & CN1->getSExtValue())
+ return SDValue();
+
SDLoc DL(N);
EVT ValTy = N->getOperand(0)->getValueType(0);
SDValue Const1;
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 272595af5f6f1..b95f1158fa562 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -274,8 +274,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
if (IsPIC) {
MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(FallThroughMBB, BalTgtMBB);
- LongBrMBB->addSuccessor(BalTgtMBB, BranchProbability::getOne());
- BalTgtMBB->addSuccessor(&*FallThroughMBB, BranchProbability::getOne());
+ LongBrMBB->addSuccessor(BalTgtMBB);
+ BalTgtMBB->addSuccessor(TgtMBB);
// We must select between the MIPS32r6/MIPS64r6 BAL (which is a normal
// instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an
@@ -342,8 +342,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::SP).addImm(8);
if (Subtarget.hasMips32r6())
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR), Mips::ZERO)
- .addReg(Mips::AT);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR))
+ .addReg(Mips::ZERO).addReg(Mips::AT);
else
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT);
@@ -415,8 +415,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::SP_64).addImm(0);
if (Subtarget.hasMips64r6())
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64), Mips::ZERO_64)
- .addReg(Mips::AT_64);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64))
+ .addReg(Mips::ZERO_64).addReg(Mips::AT_64);
else
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64);
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 2382ea2716612..b57bceb3c8371 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1257,19 +1257,22 @@ static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
EVT ResVecTy = Op->getValueType(0);
EVT ViaVecTy = ResVecTy;
+ bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
SDLoc DL(Op);
// When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
// LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
// lanes.
- SDValue LaneA;
- SDValue LaneB = Op->getOperand(2);
+ SDValue LaneA = Op->getOperand(OpNr);
+ SDValue LaneB;
if (ResVecTy == MVT::v2i64) {
- LaneA = DAG.getConstant(0, DL, MVT::i32);
+ LaneB = DAG.getConstant(0, DL, MVT::i32);
ViaVecTy = MVT::v4i32;
+ if(BigEndian)
+ std::swap(LaneA, LaneB);
} else
- LaneA = LaneB;
+ LaneB = LaneA;
SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
@@ -1277,8 +1280,11 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
SDValue Result = DAG.getBuildVector(
ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements()));
- if (ViaVecTy != ResVecTy)
- Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result);
+ if (ViaVecTy != ResVecTy) {
+ SDValue One = DAG.getConstant(1, DL, ViaVecTy);
+ Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy,
+ DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One));
+ }
return Result;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 028c2cb562f8e..6d7eb786a6835 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -113,8 +113,9 @@ public:
return (IsLittleEndian? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
}
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override {
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
@@ -130,10 +131,8 @@ public:
}
}
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override {
+ void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, bool &IsResolved) override {
switch ((PPC::Fixups)Fixup.getKind()) {
default: break;
case PPC::fixup_ppc_br24:
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 38ae62b26757a..07c9c1f9f84c0 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -24,7 +24,6 @@ namespace llvm {
class PPCTargetMachine;
class PassRegistry;
class FunctionPass;
- class ImmutablePass;
class MachineInstr;
class AsmPrinter;
class MCInst;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 57a1d373c88cf..c2c115cb6dafa 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -521,7 +521,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
- unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
+ unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 662550f7a396a..72f14e9691382 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2560,8 +2560,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
PPCII::MO_TPREL_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TPREL_LO);
- SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
- is64bit ? MVT::i64 : MVT::i32);
+ SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
+ : DAG.getRegister(PPC::R2, MVT::i32);
+
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
}
@@ -8377,9 +8378,9 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (IntrinsicID == Intrinsic::thread_pointer) {
// Reads the thread pointer register, used for __builtin_thread_pointer.
- bool is64bit = Subtarget.isPPC64();
- return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
- is64bit ? MVT::i64 : MVT::i32);
+ if (Subtarget.isPPC64())
+ return DAG.getRegister(PPC::X13, MVT::i64);
+ return DAG.getRegister(PPC::R2, MVT::i32);
}
// If this is a lowered altivec predicate compare, CompareOpc is set to the
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 70536a6039b82..e2af5e5295445 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -972,13 +972,15 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src),
// Support for medium and large code model.
let hasSideEffects = 0 in {
+let isReMaterializable = 1 in {
def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDIStocHA", []>, isPPC64;
+def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+ "#ADDItocL", []>, isPPC64;
+}
let mayLoad = 1 in
def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
"#LDtocL", []>, isPPC64;
-def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
- "#ADDItocL", []>, isPPC64;
}
// Support for thread-local storage.
@@ -994,7 +996,7 @@ def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
(PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
isPPC64;
-let isBarrier = 1, isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in
+let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in
def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 236e513bec231..13b4f9ab962da 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -292,6 +292,29 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
return 0;
}
+// For opcodes with the ReMaterializable flag set, this function is called to
+// verify the instruction is really rematable.
+bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
+ AliasAnalysis *AA) const {
+ switch (MI.getOpcode()) {
+ default:
+ // This function should only be called for opcodes with the ReMaterializable
+ // flag set.
+ llvm_unreachable("Unknown rematerializable operation!");
+ break;
+ case PPC::LI:
+ case PPC::LI8:
+ case PPC::LIS:
+ case PPC::LIS8:
+ case PPC::QVGPCI:
+ case PPC::ADDIStocHA:
+ case PPC::ADDItocL:
+ case PPC::LOAD_STACK_GUARD:
+ return true;
+ }
+ return false;
+}
+
unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
// Note: This list must be kept consistent with StoreRegToStackSlot.
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 8dd4dbb608794..b0629c88cf57b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -162,6 +162,8 @@ public:
unsigned &SubIdx) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
+ AliasAnalysis *AA) const override;
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 637e52bbdbeec..8af7f7e981171 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -389,9 +389,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
- .addReg(PPC::R31)
- .addImm(FrameSize);
+ if (LP64)
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), Reg)
+ .addReg(PPC::X31)
+ .addImm(FrameSize);
+ else
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
} else if (LP64) {
BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
.addImm(0)
@@ -478,8 +483,10 @@ void PPCRegisterInfo::lowerDynamicAreaOffset(
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
+ bool is64Bit = TM.isPPC64();
DebugLoc dl = MI.getDebugLoc();
- BuildMI(MBB, II, dl, TII.get(PPC::LI), MI.getOperand(0).getReg())
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI),
+ MI.getOperand(0).getReg())
.addImm(maxCallFrameSize);
MBB.erase(II);
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 5a226b23ff96f..a88a6541e8d00 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -86,9 +86,9 @@ EnableMachineCombinerPass("ppc-machine-combiner",
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
- RegisterTargetMachine<PPC32TargetMachine> A(getThePPC32Target());
- RegisterTargetMachine<PPC64TargetMachine> B(getThePPC64Target());
- RegisterTargetMachine<PPC64TargetMachine> C(getThePPC64LETarget());
+ RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
+ RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target());
+ RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializePPCBoolRetToIntPass(PR);
@@ -177,32 +177,34 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
assert(Options.MCOptions.getABIName().empty() &&
"Unknown target-abi option!");
- if (!TT.isMacOSX()) {
- switch (TT.getArch()) {
- case Triple::ppc64le:
- return PPCTargetMachine::PPC_ABI_ELFv2;
- case Triple::ppc64:
- return PPCTargetMachine::PPC_ABI_ELFv1;
- default:
- // Fallthrough.
- ;
- }
+ if (TT.isMacOSX())
+ return PPCTargetMachine::PPC_ABI_UNKNOWN;
+
+ switch (TT.getArch()) {
+ case Triple::ppc64le:
+ return PPCTargetMachine::PPC_ABI_ELFv2;
+ case Triple::ppc64:
+ return PPCTargetMachine::PPC_ABI_ELFv1;
+ default:
+ return PPCTargetMachine::PPC_ABI_UNKNOWN;
}
- return PPCTargetMachine::PPC_ABI_UNKNOWN;
}
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
- if (!RM.hasValue()) {
- if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
- if (!TT.isOSBinFormatMachO() && !TT.isMacOSX())
- return Reloc::PIC_;
- }
- if (TT.isOSDarwin())
- return Reloc::DynamicNoPIC;
- return Reloc::Static;
- }
- return *RM;
+ if (RM.hasValue())
+ return *RM;
+
+ // Darwin defaults to dynamic-no-pic.
+ if (TT.isOSDarwin())
+ return Reloc::DynamicNoPIC;
+
+ // Non-darwin 64-bit platforms are PIC by default.
+ if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)
+ return Reloc::PIC_;
+
+ // 32-bit is static by default.
+ return Reloc::Static;
}
// The FeatureString here is a little subtle. We are modifying the feature
@@ -224,26 +226,6 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
PPCTargetMachine::~PPCTargetMachine() = default;
-void PPC32TargetMachine::anchor() {}
-
-PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
-
-void PPC64TargetMachine::anchor() {}
-
-PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
-
const PPCSubtarget *
PPCTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -406,7 +388,7 @@ void PPCPassConfig::addPreRegAlloc() {
// FIXME: We probably don't need to run these for -fPIE.
if (getPPCTargetMachine().isPositionIndependent()) {
// FIXME: LiveVariables should not be necessary here!
- // PPCTLSDYnamicCallPass uses LiveIntervals which previously dependet on
+ // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on
// LiveVariables. This (unnecessary) dependency has been removed now,
// however a stage-2 clang build fails without LiveVariables computed here.
addPass(&LiveVariablesID, false);
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index b8f5a2083d808..5eb6ba785d1b8 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -23,7 +23,7 @@ namespace llvm {
/// Common code between 32-bit and 64-bit PowerPC targets.
///
-class PPCTargetMachine : public LLVMTargetMachine {
+class PPCTargetMachine final : public LLVMTargetMachine {
public:
enum PPCABI { PPC_ABI_UNKNOWN, PPC_ABI_ELFv1, PPC_ABI_ELFv2 };
private:
@@ -60,29 +60,6 @@ public:
return false;
}
};
-
-/// PowerPC 32-bit target machine.
-///
-class PPC32TargetMachine : public PPCTargetMachine {
- virtual void anchor();
-public:
- PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
-
-/// PowerPC 64-bit target machine.
-///
-class PPC64TargetMachine : public PPCTargetMachine {
- virtual void anchor();
-public:
- PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
-
} // end namespace llvm
#endif
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index d9a71893afee7..f85c0cf111c43 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -32,8 +32,9 @@ public:
: MCAsmBackend(), OSABI(OSABI), Is64Bit(Is64Bit) {}
~RISCVAsmBackend() override {}
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -69,9 +70,10 @@ bool RISCVAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
-void RISCVAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel, MCContext &Ctx) const {
+void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsPCRel) const {
return;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index d1d1334163a26..c72b47b090857 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -203,10 +203,8 @@ namespace {
return InfosBE[Kind - FirstTargetFixupKind];
}
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override {
+ void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, bool &IsResolved) override {
switch ((Sparc::Fixups)Fixup.getKind()) {
default: break;
case Sparc::fixup_sparc_wplt30:
@@ -273,9 +271,9 @@ namespace {
ELFSparcAsmBackend(const Target &T, Triple::OSType OSType) :
SparcAsmBackend(T), OSType(OSType) { }
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel,
- MCContext &Ctx) const override {
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp
index 627e49a95f3cc..2c040dce994b6 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.cpp
+++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp
@@ -15,6 +15,12 @@
using namespace llvm;
+void SparcELFTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
+
const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
MachineModuleInfo *MMI, MCStreamer &Streamer) const {
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.h b/lib/Target/Sparc/SparcTargetObjectFile.h
index fe8800625a567..3b1b345c3b193 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.h
+++ b/lib/Target/Sparc/SparcTargetObjectFile.h
@@ -23,6 +23,8 @@ public:
TargetLoweringObjectFileELF()
{}
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
unsigned Encoding,
const TargetMachine &TM,
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index fd1fd7bc40dcc..6b32a7926437a 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -50,8 +50,9 @@ public:
return SystemZ::NumTargetFixupKinds;
}
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
bool mayNeedRelaxation(const MCInst &Inst) const override {
return false;
}
@@ -89,15 +90,17 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
return Infos[Kind - FirstTargetFixupKind];
}
-void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel, MCContext &Ctx) const {
+void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsPCRel) const {
MCFixupKind Kind = Fixup.getKind();
unsigned Offset = Fixup.getOffset();
unsigned BitSize = getFixupKindInfo(Kind).TargetSize;
unsigned Size = (BitSize + 7) / 8;
- assert(Offset + Size <= DataSize && "Invalid fixup offset!");
+ assert(Offset + Size <= Data.size() && "Invalid fixup offset!");
// Big-endian insertion of Size bytes.
Value = extractBitsForFixup(Kind, Value);
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index a28a91e834f61..0cb2b5a14ce73 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -277,8 +277,21 @@ void SystemZFrameLowering::
processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
- uint64_t MaxReach = (MFFrame.estimateStackSize(MF) +
- SystemZMC::CallFrameSize * 2);
+ // Get the size of our stack frame to be allocated ...
+ uint64_t StackSize = (MFFrame.estimateStackSize(MF) +
+ SystemZMC::CallFrameSize);
+ // ... and the maximum offset we may need to reach into the
+ // caller's frame to access the save area or stack arguments.
+ int64_t MaxArgOffset = SystemZMC::CallFrameSize;
+ for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I)
+ if (MFFrame.getObjectOffset(I) >= 0) {
+ int64_t ArgOffset = SystemZMC::CallFrameSize +
+ MFFrame.getObjectOffset(I) +
+ MFFrame.getObjectSize(I);
+ MaxArgOffset = std::max(MaxArgOffset, ArgOffset);
+ }
+
+ uint64_t MaxReach = StackSize + MaxArgOffset;
if (!isUInt<12>(MaxReach)) {
// We may need register scavenging slots if some parts of the frame
// are outside the reach of an unsigned 12-bit displacement.
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index ac4c3f6db684d..fef4a8c92a362 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1322,11 +1322,6 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
}
-SDValue SystemZTargetLowering::prepareVolatileOrAtomicLoad(
- SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const {
- return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
-}
-
// Return true if Op is an intrinsic node with chain that returns the CC value
// as its only (other) argument. Provide the associated SystemZISD opcode and
// the mask of valid CC values if so.
@@ -2059,6 +2054,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
if (NewC.ICmpType != SystemZICMP::SignedOnly &&
NewC.Op0.getOpcode() == ISD::SHL &&
isSimpleShift(NewC.Op0, ShiftVal) &&
+ (MaskVal >> ShiftVal != 0) &&
(NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
MaskVal >> ShiftVal,
CmpVal >> ShiftVal,
@@ -2068,6 +2064,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
} else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
NewC.Op0.getOpcode() == ISD::SRL &&
isSimpleShift(NewC.Op0, ShiftVal) &&
+ (MaskVal << ShiftVal != 0) &&
(NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
MaskVal << ShiftVal,
CmpVal << ShiftVal,
@@ -3212,12 +3209,15 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
}
-// Op is an atomic load. Lower it into a normal volatile load.
+// Op is an atomic load. Lower it into a serialization followed
+// by a normal volatile load.
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
+ SDValue Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
+ MVT::Other, Node->getChain()), 0);
return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
- Node->getChain(), Node->getBasePtr(),
+ Chain, Node->getBasePtr(),
Node->getMemoryVT(), Node->getMemOperand());
}
@@ -4688,7 +4688,6 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(STRCMP);
OPCODE(SEARCH_STRING);
OPCODE(IPM);
- OPCODE(SERIALIZE);
OPCODE(MEMBARRIER);
OPCODE(TBEGIN);
OPCODE(TBEGIN_NOFLOAT);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 79c8c4d92669f..5dcb19c0a35db 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -139,9 +139,6 @@ enum NodeType : unsigned {
// Store the CC value in bits 29 and 28 of an integer.
IPM,
- // Perform a serialization operation. (BCR 15,0 or BCR 14,0.)
- SERIALIZE,
-
// Compiler barrier only; generate a no-op.
MEMBARRIER,
@@ -471,8 +468,6 @@ public:
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
- SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL,
- SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
ISD::NodeType getExtendForAtomicOps() const override {
@@ -522,7 +517,6 @@ private:
unsigned Opcode) const;
SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index fa5ecdd852433..9f5e6288348e0 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -189,18 +189,15 @@ let isBranch = 1, isTerminator = 1 in {
//===----------------------------------------------------------------------===//
// Unconditional trap.
-// FIXME: This trap instruction should be marked as isTerminator, but there is
-// currently a general bug that allows non-terminators to be placed between
-// terminators. Temporarily leave this unmarked until the bug is fixed.
-let isBarrier = 1, hasCtrlDep = 1 in
+let hasCtrlDep = 1 in
def Trap : Alias<4, (outs), (ins), [(trap)]>;
// Conditional trap.
-let isTerminator = 1, hasCtrlDep = 1, Uses = [CC] in
+let hasCtrlDep = 1, Uses = [CC] in
def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>;
// Fused compare-and-trap instructions.
-let isTerminator = 1, hasCtrlDep = 1 in {
+let hasCtrlDep = 1 in {
// These patterns work the same way as for compare-and-branch.
defm CRT : CmpBranchRRFcPair<"crt", 0xB972, GR32>;
defm CGRT : CmpBranchRRFcPair<"cgrt", 0xB960, GR64>;
@@ -1449,7 +1446,7 @@ let Predicates = [FeatureExecutionHint] in {
// A serialization instruction that acts as a barrier for all memory
// accesses, which expands to "bcr 14, 0".
let hasSideEffects = 1 in
-def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
+def Serialize : Alias<2, (outs), (ins), []>;
// A pseudo instruction that serves as a compiler barrier.
let hasSideEffects = 1, hasNoSchedulingInfo = 1 in
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index ab6020f3f1896..b6feaa49d8585 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
#ifndef NDEBUG
// Print the set of SUs
void SystemZPostRASchedStrategy::SUSet::
-dump(SystemZHazardRecognizer &HazardRec) {
+dump(SystemZHazardRecognizer &HazardRec) const {
dbgs() << "{";
for (auto &SU : *this) {
HazardRec.dumpSU(SU, dbgs());
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h
index 12357e0348a9e..3dfef388691e7 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -72,7 +72,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
// A set of SUs with a sorter and dump method.
struct SUSet : std::set<SUnit*, SUSorter> {
#ifndef NDEBUG
- void dump(SystemZHazardRecognizer &HazardRec);
+ void dump(SystemZHazardRecognizer &HazardRec) const;
#endif
};
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index adfc69c5d4cf4..ab2392809f3be 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -191,8 +191,6 @@ def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
-def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone,
- [SDNPHasChain, SDNPMayStore]>;
def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index eb2f17a2091c3..a10ca64fa6329 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -51,8 +51,6 @@ public:
}
bool targetSchedulesPostRAScheduling() const override { return true; };
-
- bool isMachineVerifierClean() const override { return false; }
};
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 4f20096c15830..1357cb5735f8a 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -37,8 +37,9 @@ public:
: MCAsmBackend(), Is64Bit(Is64Bit) {}
~WebAssemblyAsmBackendELF() override {}
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -77,8 +78,9 @@ public:
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -105,9 +107,11 @@ bool WebAssemblyAsmBackendELF::writeNopData(uint64_t Count,
return true;
}
-void WebAssemblyAsmBackendELF::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel, MCContext &Ctx) const {
+void WebAssemblyAsmBackendELF::applyFixup(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const {
const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind());
assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags");
@@ -119,7 +123,7 @@ void WebAssemblyAsmBackendELF::applyFixup(const MCFixup &Fixup, char *Data,
Value <<= Info.TargetOffset;
unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
@@ -163,9 +167,11 @@ bool WebAssemblyAsmBackend::writeNopData(uint64_t Count,
return true;
}
-void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel, MCContext &Ctx) const {
+void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const {
const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind());
assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags");
@@ -177,7 +183,7 @@ void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
Value <<= Info.TargetOffset;
unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index c56c591def361..3e3b52fca5691 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -36,7 +36,6 @@ STATISTIC(MCNumFixups, "Number of MC fixups created.");
namespace {
class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
const MCInstrInfo &MCII;
- MCContext &Ctx;
// Implementation generated by tablegen.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -48,14 +47,12 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
const MCSubtargetInfo &STI) const override;
public:
- WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : MCII(mcii), Ctx(ctx) {}
+ WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
};
} // end anonymous namespace
-MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
- MCContext &Ctx) {
- return new WebAssemblyMCCodeEmitter(MCII, Ctx);
+MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) {
+ return new WebAssemblyMCCodeEmitter(MCII);
}
void WebAssemblyMCCodeEmitter::encodeInstruction(
@@ -89,11 +86,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
} else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
encodeSLEB128(int64_t(MO.getImm()), OS);
} else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
- Fixups.push_back(MCFixup::create(
- OS.tell() - Start, MCConstantExpr::create(MO.getImm(), Ctx),
- MCFixupKind(WebAssembly::fixup_code_global_index), MI.getLoc()));
- ++MCNumFixups;
- encodeULEB128(uint64_t(MO.getImm()), OS);
+ llvm_unreachable("wasm globals should only be accessed symbolicly");
} else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) {
encodeSLEB128(int64_t(MO.getImm()), OS);
} else {
@@ -135,6 +128,9 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32);
PaddedSize = 5;
+ } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
+ FixupKind = MCFixupKind(WebAssembly::fixup_code_global_index);
+ PaddedSize = 5;
} else {
llvm_unreachable("unexpected symbolic operand kind");
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 9fd3ec81c258f..9580eeaa33d73 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -74,7 +74,7 @@ static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/,
static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo & /*MRI*/,
MCContext &Ctx) {
- return createWebAssemblyMCCodeEmitter(MCII, Ctx);
+ return createWebAssemblyMCCodeEmitter(MCII);
}
static MCAsmBackend *createAsmBackend(const Target & /*T*/,
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 0ba700a86b744..4d676c32a09c5 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -35,8 +35,7 @@ class raw_pwrite_stream;
Target &getTheWebAssemblyTarget32();
Target &getTheWebAssemblyTarget64();
-MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
- MCContext &Ctx);
+MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 19e14f3261aa7..9cf77829f3bc2 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -68,6 +68,8 @@ WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
bool IsFunction = IsFunctionExpr(Fixup.getValue());
switch (unsigned(Fixup.getKind())) {
+ case WebAssembly::fixup_code_global_index:
+ return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB;
case WebAssembly::fixup_code_sleb128_i32:
if (IsFunction)
return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB;
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index b999091e2d294..f51585a10ca12 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -96,13 +96,6 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
MCConstantExpr::create(Size, OutContext));
}
}
-
- if (!TM.getTargetTriple().isOSBinFormatELF()) {
- MachineModuleInfoWasm &MMIW = MMI->getObjFileInfo<MachineModuleInfoWasm>();
- getTargetStreamer()->emitGlobal(MMIW.getGlobals());
- if (MMIW.hasStackPointerGlobal())
- getTargetStreamer()->emitStackPointer(MMIW.getStackPointerGlobal());
- }
}
void WebAssemblyAsmPrinter::EmitConstantPool() {
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 09338a4898e03..c980f4b87f916 100644
--- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -63,12 +63,16 @@ class WebAssemblyFastISel final : public FastISel {
public:
// Innocuous defaults for our address.
Address() : Kind(RegBase), Offset(0), GV(0) { Base.Reg = 0; }
- void setKind(BaseKind K) { Kind = K; }
+ void setKind(BaseKind K) {
+ assert(!isSet() && "Can't change kind with non-zero base");
+ Kind = K;
+ }
BaseKind getKind() const { return Kind; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
+ assert(Base.Reg == 0 && "Overwriting non-zero register");
Base.Reg = Reg;
}
unsigned getReg() const {
@@ -77,6 +81,7 @@ class WebAssemblyFastISel final : public FastISel {
}
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
+ assert(Base.FI == 0 && "Overwriting non-zero frame index");
Base.FI = FI;
}
unsigned getFI() const {
@@ -91,6 +96,13 @@ class WebAssemblyFastISel final : public FastISel {
int64_t getOffset() const { return Offset; }
void setGlobalValue(const GlobalValue *G) { GV = G; }
const GlobalValue *getGlobalValue() const { return GV; }
+ bool isSet() const {
+ if (isRegBase()) {
+ return Base.Reg != 0;
+ } else {
+ return Base.FI != 0;
+ }
+ }
};
/// Keep a pointer to the WebAssemblySubtarget around so that we can make the
@@ -297,6 +309,9 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
+ if (Addr.isSet()) {
+ return false;
+ }
Addr.setKind(Address::FrameIndexBase);
Addr.setFI(SI->second);
return true;
@@ -341,6 +356,9 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
break;
}
}
+ if (Addr.isSet()) {
+ return false;
+ }
Addr.setReg(getRegForValue(Obj));
return Addr.getReg() != 0;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 4209bc333f230..a37d6136e44ed 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -104,10 +104,10 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
const DebugLoc &DL) {
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ const char *ES = "__stack_pointer";
+ auto *SPSymbol = MF.createExternalSymbolName(ES);
if (MF.getSubtarget<WebAssemblySubtarget>()
.getTargetTriple().isOSBinFormatELF()) {
- const char *ES = "__stack_pointer";
- auto *SPSymbol = MF.createExternalSymbolName(ES);
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterClass *PtrRC =
MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
@@ -125,10 +125,8 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
.addReg(SrcReg)
.addMemOperand(MMO);
} else {
- MachineModuleInfoWasm &MMIW =
- MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>();
BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::SET_GLOBAL_I32))
- .addImm(MMIW.getStackPointerGlobal())
+ .addExternalSymbol(SPSymbol)
.addReg(SrcReg);
}
}
@@ -171,10 +169,11 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
unsigned SPReg = WebAssembly::SP32;
if (StackSize)
SPReg = MRI.createVirtualRegister(PtrRC);
+
+ const char *ES = "__stack_pointer";
+ auto *SPSymbol = MF.createExternalSymbolName(ES);
if (MF.getSubtarget<WebAssemblySubtarget>()
.getTargetTriple().isOSBinFormatELF()) {
- const char *ES = "__stack_pointer";
- auto *SPSymbol = MF.createExternalSymbolName(ES);
unsigned Zero = MRI.createVirtualRegister(PtrRC);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero)
@@ -189,22 +188,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(Zero) // addr
.addMemOperand(LoadMMO);
} else {
- auto &MMIW = MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>();
- if (!MMIW.hasStackPointerGlobal()) {
- MMIW.setStackPointerGlobal(MMIW.getGlobals().size());
-
- // Create the stack-pointer global. For now, just use the
- // Emscripten/Binaryen ABI names.
- wasm::Global G;
- G.Type = wasm::ValType::I32;
- G.Mutable = true;
- G.InitialValue = 0;
- G.InitialModule = "env";
- G.InitialName = "STACKTOP";
- MMIW.addGlobal(G);
- }
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GET_GLOBAL_I32), SPReg)
- .addImm(MMIW.getStackPointerGlobal());
+ .addExternalSymbol(SPSymbol);
}
bool HasBP = hasBP(MF);
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 6650191807dcb..ea9e3fa862ce2 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -170,28 +170,16 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
if (MI.mayStore()) {
Write = true;
- const MachineFunction &MF = *MI.getParent()->getParent();
- if (MF.getSubtarget<WebAssemblySubtarget>()
- .getTargetTriple().isOSBinFormatELF()) {
- // Check for stores to __stack_pointer.
- for (auto MMO : MI.memoperands()) {
- const MachinePointerInfo &MPI = MMO->getPointerInfo();
- if (MPI.V.is<const PseudoSourceValue *>()) {
- auto PSV = MPI.V.get<const PseudoSourceValue *>();
- if (const ExternalSymbolPseudoSourceValue *EPSV =
- dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
- if (StringRef(EPSV->getSymbol()) == "__stack_pointer")
- StackPointer = true;
- }
- }
- } else {
- // Check for sets of the stack pointer.
- const MachineModuleInfoWasm &MMIW =
- MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>();
- if ((MI.getOpcode() == WebAssembly::SET_LOCAL_I32 ||
- MI.getOpcode() == WebAssembly::SET_LOCAL_I64) &&
- MI.getOperand(0).getImm() == MMIW.getStackPointerGlobal()) {
- StackPointer = true;
+ // Check for stores to __stack_pointer.
+ for (auto MMO : MI.memoperands()) {
+ const MachinePointerInfo &MPI = MMO->getPointerInfo();
+ if (MPI.V.is<const PseudoSourceValue *>()) {
+ auto PSV = MPI.V.get<const PseudoSourceValue *>();
+ if (const ExternalSymbolPseudoSourceValue *EPSV =
+ dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
+ if (StringRef(EPSV->getSymbol()) == "__stack_pointer") {
+ StackPointer = true;
+ }
}
}
} else if (MI.hasOrderedMemoryRef()) {
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e5d3209ec6a97..d30cc724c203f 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1705,8 +1705,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
unsigned Len = DotDispStr.size();
- unsigned Val = OrigDispVal + DotDispVal;
- InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val);
+ InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, DotDispVal);
}
NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 7a9e4f4468ec7..914fb36f91a7d 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -108,12 +108,12 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override {
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override {
unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
- assert(Fixup.getOffset() + Size <= DataSize &&
- "Invalid fixup offset!");
+ assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
// Check that uppper bits are either all zeros or all ones.
// Specifically ignore overflow/underflow as long as the leakage is
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 4097ef224d503..caf98bffb80de 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -153,8 +153,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
const MCSymbol *B_Base = Asm.getAtom(*B);
// Neither symbol can be modified.
- if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
- Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) {
+ if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None) {
Asm.getContext().reportError(Fixup.getLoc(),
"unsupported relocation of modified symbol");
return;
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 105580c913a16..5892f1de33eec 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -10,6 +10,7 @@
#include "MCTargetDesc/X86FixupKinds.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
@@ -25,8 +26,8 @@ public:
X86WinCOFFObjectWriter(bool Is64Bit);
~X86WinCOFFObjectWriter() override = default;
- unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsCrossSection,
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsCrossSection,
const MCAsmBackend &MAB) const override;
};
@@ -36,11 +37,19 @@ X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit)
: MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64
: COFF::IMAGE_FILE_MACHINE_I386) {}
-unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
+unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
const MCFixup &Fixup,
bool IsCrossSection,
const MCAsmBackend &MAB) const {
- unsigned FixupKind = IsCrossSection ? FK_PCRel_4 : Fixup.getKind();
+ unsigned FixupKind = Fixup.getKind();
+ if (IsCrossSection) {
+ if (FixupKind != FK_Data_4) {
+ Ctx.reportError(Fixup.getLoc(), "Cannot represent this expression");
+ return COFF::IMAGE_REL_AMD64_ADDR32;
+ }
+ FixupKind = FK_PCRel_4;
+ }
MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 2777fa89330f6..e3aa227702bea 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -748,17 +748,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
else
CallOp = X86::CALLpcrel32;
- const char *Symbol;
- if (Is64Bit) {
- if (STI.isTargetCygMing()) {
- Symbol = "___chkstk_ms";
- } else {
- Symbol = "__chkstk";
- }
- } else if (STI.isTargetCygMing())
- Symbol = "_alloca";
- else
- Symbol = "_chkstk";
+ StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF);
MachineInstrBuilder CI;
MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
@@ -769,10 +759,11 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
// For the large code model, we have to call through a register. Use R11,
// as it is scratch in all supported calling conventions.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
- .addExternalSymbol(Symbol);
+ .addExternalSymbol(MF.createExternalSymbolName(Symbol));
CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
} else {
- CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
+ CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+ .addExternalSymbol(MF.createExternalSymbolName(Symbol));
}
unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
@@ -783,13 +774,16 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
.addReg(SP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
- if (Is64Bit) {
+ if (STI.isTargetWin64() || !STI.isOSWindows()) {
+ // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
- // themselves. It also does not clobber %rax so we can reuse it when
+ // themselves. They also does not clobber %rax so we can reuse it when
// adjusting %rsp.
- BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
- .addReg(X86::RSP)
- .addReg(X86::RAX);
+ // All other platforms do not specify a particular ABI for the stack probe
+ // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
+ BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Is64Bit)), SP)
+ .addReg(SP)
+ .addReg(AX);
}
if (InProlog) {
@@ -978,7 +972,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
X86FI->setCalleeSavedFrameSize(
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
- bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO());
+ bool UseRedZone = false;
+ bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
// The default stack probe size is 4096 if the function has no stackprobesize
// attribute.
@@ -1007,6 +1002,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
!TRI->needsStackRealignment(MF) &&
!MFI.hasVarSizedObjects() && // No dynamic alloca.
!MFI.adjustsStack() && // No calls.
+ !UseStackProbe && // No stack probes.
!IsWin64CC && // Win64 has no Red Zone
!MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
@@ -1015,6 +1011,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
MFI.setStackSize(StackSize);
+ UseRedZone = true;
}
// Insert stack pointer adjustment for later moving of return addr. Only
@@ -1192,6 +1189,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
+ assert(!UseRedZone && "The Red Zone is not accounted for in stack probes");
+
// Check whether EAX is livein for this block.
bool isEAXAlive = isEAXLiveIn(MBB);
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 2a1633de0a239..3c4589ab18f6f 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -204,6 +204,11 @@ namespace {
bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
+ template <class GatherScatterSDNode>
+ bool selectAddrOfGatherScatterNode(GatherScatterSDNode *Parent, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
bool selectMOV64Imm32(SDValue N, SDValue &Imm);
bool selectLEAAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
@@ -1415,13 +1420,10 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
return false;
}
-bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index,
- SDValue &Disp, SDValue &Segment) {
-
- MaskedGatherScatterSDNode *Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent);
- if (!Mgs)
- return false;
+template <class GatherScatterSDNode>
+bool X86DAGToDAGISel::selectAddrOfGatherScatterNode(
+ GatherScatterSDNode *Mgs, SDValue N, SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace();
// AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
@@ -1453,6 +1455,18 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
return true;
}
+bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ if (auto Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent))
+ return selectAddrOfGatherScatterNode<MaskedGatherScatterSDNode>(
+ Mgs, N, Base, Scale, Index, Disp, Segment);
+ if (auto X86Gather = dyn_cast<X86MaskedGatherSDNode>(Parent))
+ return selectAddrOfGatherScatterNode<X86MaskedGatherSDNode>(
+ X86Gather, N, Base, Scale, Index, Disp, Segment);
+ return false;
+}
+
/// Returns true if it is able to pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 172eba0002d4f..f777e56289884 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1662,6 +1662,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = 4;
+
+ // TODO: These control memcmp expansion in CGP and are set low to prevent
+ // altering the vector expansion for 16/32 byte memcmp in SelectionDAGBuilder.
+ MaxLoadsPerMemcmp = 1;
+ MaxLoadsPerMemcmpOptSize = 1;
+
// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
@@ -14272,9 +14278,8 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// If we are inserting a element, see if we can do this more efficiently with
// a blend shuffle with a rematerializable vector than a costly integer
// insertion.
- // TODO: pre-SSE41 targets will tend to use bit masking - this could still
- // be beneficial if we are inserting several zeros and can combine the masks.
- if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && NumElts <= 8) {
+ if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() &&
+ 16 <= EltVT.getSizeInBits()) {
SmallVector<int, 8> BlendMask;
for (unsigned i = 0; i != NumElts; ++i)
BlendMask.push_back(i == IdxVal ? i + NumElts : i);
@@ -17621,23 +17626,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
-
SDValue CmpOp0 = Cmp.getOperand(0);
+
// Apply further optimizations for special cases
// (select (x != 0), -1, 0) -> neg & sbb
// (select (x == 0), 0, -1) -> neg & sbb
if (isNullConstant(Y) &&
- (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
- SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
- SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
- DAG.getConstant(0, DL,
- CmpOp0.getValueType()),
- CmpOp0);
- SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
- SDValue(Neg.getNode(), 1));
- return Res;
- }
+ (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
+ SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
+ SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0);
+ SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, DL, MVT::i8),
+ SDValue(Neg.getNode(), 1));
+ return Res;
+ }
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
@@ -18648,8 +18651,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
+ bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
- SplitStack;
+ SplitStack || EmitStackProbe;
SDLoc dl(Op);
// Get the inputs.
@@ -23705,6 +23709,57 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
SDValue RetOps[] = {Exract, NewGather.getValue(1)};
return DAG.getMergeValues(RetOps, dl);
}
+ if (N->getMemoryVT() == MVT::v2i32 && Subtarget.hasVLX()) {
+ // There is a special case when the return type is v2i32 is illegal and
+ // the type legaizer extended it to v2i64. Without this conversion we end up
+ // with VPGATHERQQ (reading q-words from the memory) instead of VPGATHERQD.
+ // In order to avoid this situation, we'll build an X86 specific Gather node
+ // with index v2i64 and value type v4i32.
+ assert(VT == MVT::v2i64 && Src0.getValueType() == MVT::v2i64 &&
+ "Unexpected type in masked gather");
+ Src0 = DAG.getVectorShuffle(MVT::v4i32, dl,
+ DAG.getBitcast(MVT::v4i32, Src0),
+ DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 });
+ // The mask should match the destination type. Extending mask with zeroes
+ // is not necessary since instruction itself reads only two values from
+ // memory.
+ Mask = ExtendToType(Mask, MVT::v4i1, DAG, false);
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+ DAG.getVTList(MVT::v4i32, MVT::Other), Ops, dl, N->getMemoryVT(),
+ N->getMemOperand());
+
+ SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64,
+ NewGather.getValue(0), DAG);
+ SDValue RetOps[] = { Sext, NewGather.getValue(1) };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+ if (N->getMemoryVT() == MVT::v2f32 && Subtarget.hasVLX()) {
+ // This transformation is for optimization only.
+ // The type legalizer extended mask and index to 4 elements vector
+ // in order to match requirements of the common gather node - same
+ // vector width of index and value. X86 Gather node allows mismatch
+ // of vector width in order to select more optimal instruction at the
+ // end.
+ assert(VT == MVT::v4f32 && Src0.getValueType() == MVT::v4f32 &&
+ "Unexpected type in masked gather");
+ if (Mask.getOpcode() == ISD::CONCAT_VECTORS &&
+ ISD::isBuildVectorAllZeros(Mask.getOperand(1).getNode()) &&
+ Index.getOpcode() == ISD::CONCAT_VECTORS &&
+ Index.getOperand(1).isUndef()) {
+ Mask = ExtendToType(Mask.getOperand(0), MVT::v4i1, DAG, false);
+ Index = Index.getOperand(0);
+ } else
+ return Op;
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+ DAG.getVTList(MVT::v4f32, MVT::Other), Ops, dl, N->getMemoryVT(),
+ N->getMemOperand());
+
+ SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(1) };
+ return DAG.getMergeValues(RetOps, dl);
+
+ }
return Op;
}
@@ -24508,6 +24563,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND";
case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
case X86ISD::LWPINS: return "X86ISD::LWPINS";
+ case X86ISD::MGATHER: return "X86ISD::MGATHER";
}
return nullptr;
}
@@ -29868,7 +29924,7 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
if (N->getValueType(0) == MVT::i32)
Diff = (unsigned)Diff;
- bool isFastMultiplier = false;
+ bool IsFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
default:
@@ -29880,12 +29936,12 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
case 5: // result = lea base(cond, cond*4)
case 8: // result = lea base( , cond*8)
case 9: // result = lea base(cond, cond*8)
- isFastMultiplier = true;
+ IsFastMultiplier = true;
break;
}
}
- if (isFastMultiplier) {
+ if (IsFastMultiplier) {
APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
@@ -34841,23 +34897,56 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
!Cmp.getOperand(0).getValueType().isInteger())
return SDValue();
- // (cmp Z, 1) sets the carry flag if Z is 0.
SDValue Z = Cmp.getOperand(0);
- SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z,
- DAG.getConstant(1, DL, Z.getValueType()));
+ EVT ZVT = Z.getValueType();
+
+ // If X is -1 or 0, then we have an opportunity to avoid constants required in
+ // the general case below.
+ if (auto *ConstantX = dyn_cast<ConstantSDNode>(X)) {
+ // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with
+ // fake operands:
+ // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
+ // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
+ if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) ||
+ (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) {
+ SDValue Zero = DAG.getConstant(0, DL, ZVT);
+ SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B, DL, MVT::i8),
+ SDValue(Neg.getNode(), 1));
+ }
+
+ // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb'
+ // with fake operands:
+ // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
+ // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
+ if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) ||
+ (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) {
+ SDValue One = DAG.getConstant(1, DL, ZVT);
+ SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp1);
+ }
+ }
- SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+ // (cmp Z, 1) sets the carry flag if Z is 0.
+ SDValue One = DAG.getConstant(1, DL, ZVT);
+ SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);
+
+ // Add the flags type for ADC/SBB nodes.
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
// X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
// X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
if (CC == X86::COND_NE)
return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
- DAG.getConstant(-1ULL, DL, VT), NewCmp);
+ DAG.getConstant(-1ULL, DL, VT), Cmp1);
// X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
// X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
- DAG.getConstant(0, DL, VT), NewCmp);
+ DAG.getConstant(0, DL, VT), Cmp1);
}
static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
@@ -34976,6 +35065,32 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi);
}
+/// Convert vector increment or decrement to sub/add with an all-ones constant:
+/// add X, <1, 1...> --> sub X, <-1, -1...>
+/// sub X, <1, 1...> --> add X, <-1, -1...>
+/// The all-ones vector constant can be materialized using a pcmpeq instruction
+/// that is commonly recognized as an idiom (has no register dependency), so
+/// that's better/smaller than loading a splat 1 constant.
+static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB &&
+ "Unexpected opcode for increment/decrement transform");
+
+ // Pseudo-legality check: getOnesVector() expects one of these types, so bail
+ // out and wait for legalization if we have an unsupported vector length.
+ EVT VT = N->getValueType(0);
+ if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
+ return SDValue();
+
+ SDNode *N1 = N->getOperand(1).getNode();
+ APInt SplatVal;
+ if (!ISD::isConstantSplatVector(N1, SplatVal) || !SplatVal.isOneValue())
+ return SDValue();
+
+ SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
+ unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
+ return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec);
+}
+
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
const SDNodeFlags Flags = N->getFlags();
@@ -34995,6 +35110,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
+ if (SDValue V = combineIncDecVector(N, DAG))
+ return V;
+
return combineAddOrSubToADCOrSBB(N, DAG);
}
@@ -35028,6 +35146,9 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
isHorizontalBinOp(Op0, Op1, false))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
+ if (SDValue V = combineIncDecVector(N, DAG))
+ return V;
+
return combineAddOrSubToADCOrSBB(N, DAG);
}
@@ -36335,3 +36456,22 @@ void X86TargetLowering::insertCopiesSplitCSR(
bool X86TargetLowering::supportSwiftError() const {
return Subtarget.is64Bit();
}
+
+/// Returns the name of the symbol used to emit stack probes or the empty
+/// string if not applicable.
+StringRef X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
+ // If the function specifically requests stack probes, emit them.
+ if (MF.getFunction()->hasFnAttribute("probe-stack"))
+ return MF.getFunction()->getFnAttribute("probe-stack").getValueAsString();
+
+ // Generally, if we aren't on Windows, the platform ABI does not include
+ // support for stack probes, so don't emit them.
+ if (!Subtarget.isOSWindows() || Subtarget.isTargetMachO())
+ return "";
+
+ // We need a stack probe to conform to the Windows ABI. Choose the right
+ // symbol.
+ if (Subtarget.is64Bit())
+ return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk";
+ return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk";
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index f51b6641db2fb..e1ade92979dc0 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -615,7 +615,10 @@ namespace llvm {
// Vector truncating store with unsigned/signed saturation
VTRUNCSTOREUS, VTRUNCSTORES,
// Vector truncating masked store with unsigned/signed saturation
- VMTRUNCSTOREUS, VMTRUNCSTORES
+ VMTRUNCSTOREUS, VMTRUNCSTORES,
+
+ // X86 specific gather
+ MGATHER
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
@@ -1056,6 +1059,8 @@ namespace llvm {
bool supportSwiftError() const override;
+ StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
+
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
/// \brief Lower interleaved load(s) into target specific
@@ -1065,6 +1070,12 @@ namespace llvm {
ArrayRef<unsigned> Indices,
unsigned Factor) const override;
+ /// \brief Lower interleaved store(s) into target specific
+ /// instructions/intrinsics.
+ bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+ unsigned Factor) const override;
+
+
void finalizeLowering(MachineFunction &MF) const override;
protected:
@@ -1397,6 +1408,19 @@ namespace llvm {
}
};
+ // X86 specific Gather node.
+ class X86MaskedGatherSDNode : public MaskedGatherScatterSDNode {
+ public:
+ X86MaskedGatherSDNode(unsigned Order,
+ const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO)
+ : MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT, MMO)
+ {}
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == X86ISD::MGATHER;
+ }
+ };
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 2620679df2517..01a70323224c3 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -7265,13 +7265,13 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
let Predicates = [HasAVX512] in {
def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
- (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>;
+ (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x9))), _.FRC)>;
def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
- (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>;
+ (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xa))), _.FRC)>;
def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
- (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>;
+ (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xb))), _.FRC)>;
def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
@@ -7281,13 +7281,13 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
- addr:$src, (i32 0x1))), _.FRC)>;
+ addr:$src, (i32 0x9))), _.FRC)>;
def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
- addr:$src, (i32 0x2))), _.FRC)>;
+ addr:$src, (i32 0xa))), _.FRC)>;
def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
- addr:$src, (i32 0x3))), _.FRC)>;
+ addr:$src, (i32 0xb))), _.FRC)>;
def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0x4))), _.FRC)>;
@@ -7869,7 +7869,7 @@ let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
vx128xmem, mgatherv4i32>, EVEX_V128;
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
- vx64xmem, mgatherv2i64>, EVEX_V128;
+ vx64xmem, X86mgatherv2i64>, EVEX_V128;
}
}
@@ -8471,26 +8471,26 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
}
let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (ffloor VR512:$src)),
- (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>;
+ (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
def : Pat<(v16f32 (fnearbyint VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
def : Pat<(v16f32 (fceil VR512:$src)),
- (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>;
+ (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
def : Pat<(v16f32 (frint VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
def : Pat<(v16f32 (ftrunc VR512:$src)),
- (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>;
+ (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
def : Pat<(v8f64 (ffloor VR512:$src)),
- (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>;
+ (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
def : Pat<(v8f64 (fnearbyint VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
def : Pat<(v8f64 (fceil VR512:$src)),
- (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>;
+ (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
def : Pat<(v8f64 (frint VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
def : Pat<(v8f64 (ftrunc VR512:$src)),
- (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>;
+ (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
}
defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index c28b35b22977a..8b5bbf24f6f63 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -641,7 +641,7 @@ def sdmem : Operand<v2f64> {
// SSE pattern fragments
//===----------------------------------------------------------------------===//
-// Vector load wrappers to prevent folding of non-temporal aligned loads on
+// Vector load wrappers to prevent folding of non-temporal aligned loads on
// supporting targets.
def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return !Subtarget->hasSSE41() || !cast<LoadSDNode>(N)->isNonTemporal() ||
@@ -754,16 +754,6 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
-// These are needed to match a scalar memop that is used in a vector-only
-// math instruction such as the FP logical ops: andps, andnps, orps, xorps.
-// The memory operand is required to be a 128-bit load, so it must be converted
-// from a vector to a scalar.
-def memopfsf32_128 : PatFrag<(ops node:$ptr),
- (f32 (extractelt (memopv4f32 node:$ptr), (iPTR 0)))>;
-def memopfsf64_128 : PatFrag<(ops node:$ptr),
- (f64 (extractelt (memopv2f64 node:$ptr), (iPTR 0)))>;
-
-
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
// FIXME: 8 byte alignment for mmx reads is not required
@@ -773,6 +763,9 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>;
+def X86masked_gather : SDNode<"X86ISD::MGATHER", SDTMaskedGather,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -796,6 +789,15 @@ def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
Mgt->getBasePtr().getValueType() == MVT::v2i64);
return false;
}]>;
+def X86mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
+ if (X86MaskedGatherSDNode *Mgt = dyn_cast<X86MaskedGatherSDNode>(N))
+ return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
+ Mgt->getBasePtr().getValueType() == MVT::v2i64) &&
+ (Mgt->getMemoryVT() == MVT::v2i32 ||
+ Mgt->getMemoryVT() == MVT::v2f32);
+ return false;
+}]>;
def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 8490b972eb5c1..fe87bbd994738 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1744,7 +1744,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG;
}
-def : Pat<(f32 (fpround FR64:$src)),
+def : Pat<(f32 (fpround FR64:$src)),
(VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>,
Requires<[UseAVX]>;
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index 77dead8d24137..f98c2a7e802dd 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -72,9 +72,24 @@ private:
MachineFunction &MF) const;
bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
-
bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+ bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+
+ // emit insert subreg instruction and insert it before MachineInstr &I
+ bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
+ MachineRegisterInfo &MRI, MachineFunction &MF) const;
+ // emit extract subreg instruction and insert it before MachineInstr &I
+ bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
+ MachineRegisterInfo &MRI, MachineFunction &MF) const;
+
+ const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const;
+ const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg,
+ MachineRegisterInfo &MRI) const;
const X86TargetMachine &TM;
const X86Subtarget &STI;
@@ -113,8 +128,8 @@ X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
// FIXME: This should be target-independent, inferred from the types declared
// for each class in the bank.
-static const TargetRegisterClass *
-getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) {
+const TargetRegisterClass *
+X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
if (RB.getID() == X86::GPRRegBankID) {
if (Ty.getSizeInBits() <= 8)
return &X86::GR8RegClass;
@@ -127,13 +142,13 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) {
}
if (RB.getID() == X86::VECRRegBankID) {
if (Ty.getSizeInBits() == 32)
- return &X86::FR32XRegClass;
+ return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
if (Ty.getSizeInBits() == 64)
- return &X86::FR64XRegClass;
+ return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
if (Ty.getSizeInBits() == 128)
- return &X86::VR128XRegClass;
+ return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass;
if (Ty.getSizeInBits() == 256)
- return &X86::VR256XRegClass;
+ return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass;
if (Ty.getSizeInBits() == 512)
return &X86::VR512RegClass;
}
@@ -141,10 +156,16 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) {
llvm_unreachable("Unknown RegBank!");
}
+const TargetRegisterClass *
+X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg,
+ MachineRegisterInfo &MRI) const {
+ const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI);
+ return getRegClass(Ty, RegBank);
+}
+
// Set X86 Opcode and constrain DestReg.
-static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
- MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) {
+bool X86InstructionSelector::selectCopy(MachineInstr &I,
+ MachineRegisterInfo &MRI) const {
unsigned DstReg = I.getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
@@ -171,7 +192,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
switch (RegBank.getID()) {
case X86::GPRRegBankID:
assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values.");
- RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
+ RC = getRegClass(MRI.getType(DstReg), RegBank);
// Change the physical register
if (SrcSize > DstSize && TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
@@ -186,7 +207,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
}
break;
case X86::VECRRegBankID:
- RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
+ RC = getRegClass(MRI.getType(DstReg), RegBank);
break;
default:
llvm_unreachable("Unknown RegBank!");
@@ -220,7 +241,7 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
// Certain non-generic instructions also need some special handling.
if (I.isCopy())
- return selectCopy(I, TII, MRI, TRI, RBI);
+ return selectCopy(I, MRI);
// TODO: handle more cases - LOAD_STACK_GUARD, PHI
return true;
@@ -249,6 +270,10 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
return true;
if (selectUadde(I, MRI, MF))
return true;
+ if (selectExtract(I, MRI, MF))
+ return true;
+ if (selectInsert(I, MRI, MF))
+ return true;
return false;
}
@@ -326,6 +351,34 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB,
return Opc;
}
+// Fill in an address from the given instruction.
+void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI,
+ X86AddressMode &AM) {
+
+ assert(I.getOperand(0).isReg() && "unsupported opperand.");
+ assert(MRI.getType(I.getOperand(0).getReg()).isPointer() &&
+ "unsupported type.");
+
+ if (I.getOpcode() == TargetOpcode::G_GEP) {
+ if (auto COff = getConstantVRegVal(I.getOperand(2).getReg(), MRI)) {
+ int64_t Imm = *COff;
+ if (isInt<32>(Imm)) { // Check for displacement overflow.
+ AM.Disp = static_cast<int32_t>(Imm);
+ AM.Base.Reg = I.getOperand(1).getReg();
+ return;
+ }
+ }
+ } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ AM.Base.FrameIndex = I.getOperand(1).getIndex();
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ return;
+ }
+
+ // Default behavior.
+ AM.Base.Reg = I.getOperand(0).getReg();
+ return;
+}
+
bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
MachineRegisterInfo &MRI,
MachineFunction &MF) const {
@@ -340,18 +393,28 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
auto &MemOp = **I.memoperands_begin();
+ if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
+ DEBUG(dbgs() << "Atomic load/store not supported yet\n");
+ return false;
+ }
+
unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment());
if (NewOpc == Opc)
return false;
+ X86AddressMode AM;
+ X86SelectAddress(*MRI.getVRegDef(I.getOperand(1).getReg()), MRI, AM);
+
I.setDesc(TII.get(NewOpc));
MachineInstrBuilder MIB(MF, I);
- if (Opc == TargetOpcode::G_LOAD)
- addOffset(MIB, 0);
- else {
+ if (Opc == TargetOpcode::G_LOAD) {
+ I.RemoveOperand(1);
+ addFullAddress(MIB, AM);
+ } else {
// G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
+ I.RemoveOperand(1);
I.RemoveOperand(0);
- addOffset(MIB, 0).addUse(DefReg);
+ addFullAddress(MIB, AM).addUse(DefReg);
}
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
@@ -461,11 +524,11 @@ bool X86InstructionSelector::selectTrunc(MachineInstr &I,
if (DstRB.getID() != X86::GPRRegBankID)
return false;
- const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
+ const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
if (!DstRC)
return false;
- const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
+ const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
if (!SrcRC)
return false;
@@ -519,9 +582,8 @@ bool X86InstructionSelector::selectZext(MachineInstr &I,
else
return false;
- const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
unsigned DefReg =
- MRI.createVirtualRegister(getRegClassForTypeOnBank(DstTy, RegBank));
+ MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::SUBREG_TO_REG), DefReg)
@@ -656,6 +718,202 @@ bool X86InstructionSelector::selectUadde(MachineInstr &I,
return true;
}
+bool X86InstructionSelector::selectExtract(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+
+ if (I.getOpcode() != TargetOpcode::G_EXTRACT)
+ return false;
+
+ const unsigned DstReg = I.getOperand(0).getReg();
+ const unsigned SrcReg = I.getOperand(1).getReg();
+ int64_t Index = I.getOperand(2).getImm();
+
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+
+ // Meanwile handle vector type only.
+ if (!DstTy.isVector())
+ return false;
+
+ if (Index % DstTy.getSizeInBits() != 0)
+ return false; // Not extract subvector.
+
+ if (Index == 0) {
+ // Replace by extract subreg copy.
+ if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
+
+ bool HasAVX = STI.hasAVX();
+ bool HasAVX512 = STI.hasAVX512();
+ bool HasVLX = STI.hasVLX();
+
+ if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) {
+ if (HasVLX)
+ I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr));
+ else if (HasAVX)
+ I.setDesc(TII.get(X86::VEXTRACTF128rr));
+ else
+ return false;
+ } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) {
+ if (DstTy.getSizeInBits() == 128)
+ I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr));
+ else if (DstTy.getSizeInBits() == 256)
+ I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr));
+ else
+ return false;
+ } else
+ return false;
+
+ // Convert to X86 VEXTRACT immediate.
+ Index = Index / DstTy.getSizeInBits();
+ I.getOperand(2).setImm(Index);
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg,
+ MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+ unsigned SubIdx = X86::NoSubRegister;
+
+ if (!DstTy.isVector() || !SrcTy.isVector())
+ return false;
+
+ assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() &&
+ "Incorrect Src/Dst register size");
+
+ if (DstTy.getSizeInBits() == 128)
+ SubIdx = X86::sub_xmm;
+ else if (DstTy.getSizeInBits() == 256)
+ SubIdx = X86::sub_ymm;
+ else
+ return false;
+
+ const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
+ const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
+
+ SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
+
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+ return false;
+ }
+
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg)
+ .addReg(SrcReg, 0, SubIdx);
+
+ return true;
+}
+
+bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg,
+ MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+ unsigned SubIdx = X86::NoSubRegister;
+
+ // TODO: support scalar types
+ if (!DstTy.isVector() || !SrcTy.isVector())
+ return false;
+
+ assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() &&
+ "Incorrect Src/Dst register size");
+
+ if (SrcTy.getSizeInBits() == 128)
+ SubIdx = X86::sub_xmm;
+ else if (SrcTy.getSizeInBits() == 256)
+ SubIdx = X86::sub_ymm;
+ else
+ return false;
+
+ const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
+ const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
+
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n");
+ return false;
+ }
+
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY))
+ .addReg(DstReg, RegState::DefineNoRead, SubIdx)
+ .addReg(SrcReg);
+
+ return true;
+}
+
+bool X86InstructionSelector::selectInsert(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+
+ if (I.getOpcode() != TargetOpcode::G_INSERT)
+ return false;
+
+ const unsigned DstReg = I.getOperand(0).getReg();
+ const unsigned SrcReg = I.getOperand(1).getReg();
+ const unsigned InsertReg = I.getOperand(2).getReg();
+ int64_t Index = I.getOperand(3).getImm();
+
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT InsertRegTy = MRI.getType(InsertReg);
+
+ // Meanwile handle vector type only.
+ if (!DstTy.isVector())
+ return false;
+
+ if (Index % InsertRegTy.getSizeInBits() != 0)
+ return false; // Not insert subvector.
+
+ if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) {
+ // Replace by subreg copy.
+ if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
+
+ bool HasAVX = STI.hasAVX();
+ bool HasAVX512 = STI.hasAVX512();
+ bool HasVLX = STI.hasVLX();
+
+ if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) {
+ if (HasVLX)
+ I.setDesc(TII.get(X86::VINSERTF32x4Z256rr));
+ else if (HasAVX)
+ I.setDesc(TII.get(X86::VINSERTF128rr));
+ else
+ return false;
+ } else if (DstTy.getSizeInBits() == 512 && HasAVX512) {
+ if (InsertRegTy.getSizeInBits() == 128)
+ I.setDesc(TII.get(X86::VINSERTF32x4Zrr));
+ else if (InsertRegTy.getSizeInBits() == 256)
+ I.setDesc(TII.get(X86::VINSERTF64x4Zrr));
+ else
+ return false;
+ } else
+ return false;
+
+ // Convert to X86 VINSERT immediate.
+ Index = Index / InsertRegTy.getSizeInBits();
+
+ I.getOperand(3).setImm(Index);
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
InstructionSelector *
llvm::createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &Subtarget,
diff --git a/lib/Target/X86/X86InterleavedAccess.cpp b/lib/Target/X86/X86InterleavedAccess.cpp
index 806d6cc888f0f..f0ed4bc16e2f9 100644
--- a/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/lib/Target/X86/X86InterleavedAccess.cpp
@@ -16,6 +16,7 @@
#include "X86ISelLowering.h"
#include "X86TargetMachine.h"
+#include "llvm/Analysis/VectorUtils.h"
using namespace llvm;
@@ -50,9 +51,8 @@ class X86InterleavedAccessGroup {
IRBuilder<> &Builder;
/// \brief Breaks down a vector \p 'Inst' of N elements into \p NumSubVectors
- /// sub vectors of type \p T. Returns true and the sub-vectors in
- /// \p DecomposedVectors if it decomposes the Inst, returns false otherwise.
- bool decompose(Instruction *Inst, unsigned NumSubVectors, VectorType *T,
+ /// sub vectors of type \p T. Returns the sub-vectors in \p DecomposedVectors.
+ void decompose(Instruction *Inst, unsigned NumSubVectors, VectorType *T,
SmallVectorImpl<Instruction *> &DecomposedVectors);
/// \brief Performs matrix transposition on a 4x4 matrix \p InputVectors and
@@ -80,8 +80,7 @@ public:
/// target information \p STarget.
explicit X86InterleavedAccessGroup(Instruction *I,
ArrayRef<ShuffleVectorInst *> Shuffs,
- ArrayRef<unsigned> Ind,
- const unsigned F,
+ ArrayRef<unsigned> Ind, const unsigned F,
const X86Subtarget &STarget,
IRBuilder<> &B)
: Inst(I), Shuffles(Shuffs), Indices(Ind), Factor(F), Subtarget(STarget),
@@ -102,48 +101,61 @@ bool X86InterleavedAccessGroup::isSupported() const {
uint64_t ShuffleVecSize = DL.getTypeSizeInBits(ShuffleVecTy);
Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType();
- if (DL.getTypeSizeInBits(Inst->getType()) < Factor * ShuffleVecSize)
- return false;
+ // Currently, lowering is supported for 4-element vectors of 64 bits on AVX.
+ uint64_t ExpectedShuffleVecSize;
+ if (isa<LoadInst>(Inst))
+ ExpectedShuffleVecSize = 256;
+ else
+ ExpectedShuffleVecSize = 1024;
- // Currently, lowering is supported for 64 bits on AVX.
- if (!Subtarget.hasAVX() || ShuffleVecSize != 256 ||
+ if (!Subtarget.hasAVX() || ShuffleVecSize != ExpectedShuffleVecSize ||
DL.getTypeSizeInBits(ShuffleEltTy) != 64 || Factor != 4)
return false;
return true;
}
-bool X86InterleavedAccessGroup::decompose(
+void X86InterleavedAccessGroup::decompose(
Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy,
SmallVectorImpl<Instruction *> &DecomposedVectors) {
+
+ assert((isa<LoadInst>(VecInst) || isa<ShuffleVectorInst>(VecInst)) &&
+ "Expected Load or Shuffle");
+
Type *VecTy = VecInst->getType();
(void)VecTy;
assert(VecTy->isVectorTy() &&
DL.getTypeSizeInBits(VecTy) >=
DL.getTypeSizeInBits(SubVecTy) * NumSubVectors &&
"Invalid Inst-size!!!");
- assert(VecTy->getVectorElementType() == SubVecTy->getVectorElementType() &&
- "Element type mismatched!!!");
- if (!isa<LoadInst>(VecInst))
- return false;
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(VecInst)) {
+ Value *Op0 = SVI->getOperand(0);
+ Value *Op1 = SVI->getOperand(1);
+
+ // Generate N(= NumSubVectors) shuffles of T(= SubVecTy) type.
+ for (unsigned i = 0; i < NumSubVectors; ++i)
+ DecomposedVectors.push_back(
+ cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, Indices[i],
+ SubVecTy->getVectorNumElements(), 0))));
+ return;
+ }
+ // Decompose the load instruction.
LoadInst *LI = cast<LoadInst>(VecInst);
Type *VecBasePtrTy = SubVecTy->getPointerTo(LI->getPointerAddressSpace());
-
Value *VecBasePtr =
Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
- // Generate N loads of T type
+ // Generate N loads of T type.
for (unsigned i = 0; i < NumSubVectors; i++) {
- // TODO: Support inbounds GEP
+ // TODO: Support inbounds GEP.
Value *NewBasePtr = Builder.CreateGEP(VecBasePtr, Builder.getInt32(i));
Instruction *NewLoad =
Builder.CreateAlignedLoad(NewBasePtr, LI->getAlignment());
DecomposedVectors.push_back(NewLoad);
}
-
- return true;
}
void X86InterleavedAccessGroup::transpose_4x4(
@@ -181,21 +193,46 @@ void X86InterleavedAccessGroup::transpose_4x4(
// instructions/intrinsics.
bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
SmallVector<Instruction *, 4> DecomposedVectors;
- VectorType *VecTy = Shuffles[0]->getType();
- // Try to generate target-sized register(/instruction).
- if (!decompose(Inst, Factor, VecTy, DecomposedVectors))
- return false;
-
SmallVector<Value *, 4> TransposedVectors;
- // Perform matrix-transposition in order to compute interleaved
- // results by generating some sort of (optimized) target-specific
- // instructions.
+ VectorType *ShuffleTy = Shuffles[0]->getType();
+
+ if (isa<LoadInst>(Inst)) {
+ // Try to generate target-sized register(/instruction).
+ decompose(Inst, Factor, ShuffleTy, DecomposedVectors);
+
+ // Perform matrix-transposition in order to compute interleaved
+ // results by generating some sort of (optimized) target-specific
+ // instructions.
+ transpose_4x4(DecomposedVectors, TransposedVectors);
+
+ // Now replace the unoptimized-interleaved-vectors with the
+ // transposed-interleaved vectors.
+ for (unsigned i = 0, e = Shuffles.size(); i < e; ++i)
+ Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]);
+
+ return true;
+ }
+
+ Type *ShuffleEltTy = ShuffleTy->getVectorElementType();
+ unsigned NumSubVecElems = ShuffleTy->getVectorNumElements() / Factor;
+
+ // Lower the interleaved stores:
+ // 1. Decompose the interleaved wide shuffle into individual shuffle
+ // vectors.
+ decompose(Shuffles[0], Factor,
+ VectorType::get(ShuffleEltTy, NumSubVecElems), DecomposedVectors);
+
+ // 2. Transpose the interleaved-vectors into vectors of contiguous
+ // elements.
transpose_4x4(DecomposedVectors, TransposedVectors);
- // Now replace the unoptimized-interleaved-vectors with the
- // transposed-interleaved vectors.
- for (unsigned i = 0; i < Shuffles.size(); i++)
- Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]);
+ // 3. Concatenate the contiguous-vectors back into a wide vector.
+ Value *WideVec = concatenateVectors(Builder, TransposedVectors);
+
+ // 4. Generate a store instruction for wide-vec.
+ StoreInst *SI = cast<StoreInst>(Inst);
+ Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(),
+ SI->getAlignment());
return true;
}
@@ -220,3 +257,29 @@ bool X86TargetLowering::lowerInterleavedLoad(
return Grp.isSupported() && Grp.lowerIntoOptimizedSequence();
}
+
+bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI,
+ ShuffleVectorInst *SVI,
+ unsigned Factor) const {
+ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+ "Invalid interleave factor");
+
+ assert(SVI->getType()->getVectorNumElements() % Factor == 0 &&
+ "Invalid interleaved store");
+
+ // Holds the indices of SVI that correspond to the starting index of each
+ // interleaved shuffle.
+ SmallVector<unsigned, 4> Indices;
+ auto Mask = SVI->getShuffleMask();
+ for (unsigned i = 0; i < Factor; i++)
+ Indices.push_back(Mask[i]);
+
+ ArrayRef<ShuffleVectorInst *> Shuffles = makeArrayRef(SVI);
+
+ // Create an interleaved access group.
+ IRBuilder<> Builder(SI);
+ X86InterleavedAccessGroup Grp(SI, Shuffles, Indices, Factor, Subtarget,
+ Builder);
+
+ return Grp.isSupported() && Grp.lowerIntoOptimizedSequence();
+}
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index bc73bb1ae8c51..6b1add8ff8ed1 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -510,12 +510,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::SHUF128, 0),
X86_INTRINSIC_DATA(avx512_mask_broadcasti64x4_512, BRCST_SUBVEC_TO_VEC,
X86ISD::SHUF128, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM,
@@ -524,16 +518,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM,
X86ISD::CMPM_RND),
- X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC,
X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC,
X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
- X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG,
@@ -1171,18 +1159,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FSUBS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FSUBS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_d_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_d_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_d_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_q_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_q_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_q_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM,
X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM,
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index 979aaee110aa4..a584eabcc1b28 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -214,12 +214,24 @@ void X86LegalizerInfo::setLegalizerInfoAVX() {
if (!Subtarget.hasAVX())
return;
+ const LLT v16s8 = LLT::vector(16, 8);
+ const LLT v8s16 = LLT::vector(8, 16);
+ const LLT v4s32 = LLT::vector(4, 32);
+ const LLT v2s64 = LLT::vector(2, 64);
+
+ const LLT v32s8 = LLT::vector(32, 8);
+ const LLT v16s16 = LLT::vector(16, 16);
const LLT v8s32 = LLT::vector(8, 32);
const LLT v4s64 = LLT::vector(4, 64);
for (unsigned MemOp : {G_LOAD, G_STORE})
for (auto Ty : {v8s32, v4s64})
setAction({MemOp, Ty}, Legal);
+
+ for (auto Ty : {v32s8, v16s16, v8s32, v4s64})
+ setAction({G_INSERT, Ty}, Legal);
+ for (auto Ty : {v16s8, v8s16, v4s32, v2s64})
+ setAction({G_INSERT, 1, Ty}, Legal);
}
void X86LegalizerInfo::setLegalizerInfoAVX2() {
@@ -243,6 +255,18 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() {
if (!Subtarget.hasAVX512())
return;
+ const LLT v16s8 = LLT::vector(16, 8);
+ const LLT v8s16 = LLT::vector(8, 16);
+ const LLT v4s32 = LLT::vector(4, 32);
+ const LLT v2s64 = LLT::vector(2, 64);
+
+ const LLT v32s8 = LLT::vector(32, 8);
+ const LLT v16s16 = LLT::vector(16, 16);
+ const LLT v8s32 = LLT::vector(8, 32);
+ const LLT v4s64 = LLT::vector(4, 64);
+
+ const LLT v64s8 = LLT::vector(64, 8);
+ const LLT v32s16 = LLT::vector(32, 16);
const LLT v16s32 = LLT::vector(16, 32);
const LLT v8s64 = LLT::vector(8, 64);
@@ -256,13 +280,15 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() {
for (auto Ty : {v16s32, v8s64})
setAction({MemOp, Ty}, Legal);
+ for (auto Ty : {v64s8, v32s16, v16s32, v8s64})
+ setAction({G_INSERT, Ty}, Legal);
+ for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64})
+ setAction({G_INSERT, 1, Ty}, Legal);
+
/************ VLX *******************/
if (!Subtarget.hasVLX())
return;
- const LLT v4s32 = LLT::vector(4, 32);
- const LLT v8s32 = LLT::vector(8, 32);
-
for (auto Ty : {v4s32, v8s32})
setAction({G_MUL, Ty}, Legal);
}
diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp
index dd21e2b7c4a13..8fdf10617059a 100644
--- a/lib/Target/X86/X86MacroFusion.cpp
+++ b/lib/Target/X86/X86MacroFusion.cpp
@@ -2,39 +2,31 @@
//
// The LLVM Compiler Infrastructure
//
-// \file This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
-// This file contains the X86 implementation of the DAG scheduling mutation to
-// pair instructions back to back.
+/// \file This file contains the X86 implementation of the DAG scheduling
+/// mutation to pair instructions back to back.
//
//===----------------------------------------------------------------------===//
#include "X86MacroFusion.h"
#include "X86Subtarget.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
-
-#define DEBUG_TYPE "misched"
-
-STATISTIC(NumFused, "Number of instr pairs fused");
+#include "llvm/CodeGen/MacroFusion.h"
using namespace llvm;
-static cl::opt<bool> EnableMacroFusion("x86-misched-fusion", cl::Hidden,
- cl::desc("Enable scheduling for macro fusion."), cl::init(true));
-
-namespace {
-
-/// \brief Verify that the instruction pair, First and Second,
-/// should be scheduled back to back. If either instruction is unspecified,
-/// then verify that the other instruction may be part of a pair at all.
-static bool shouldScheduleAdjacent(const X86Subtarget &ST,
- const MachineInstr *First,
- const MachineInstr *Second) {
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given SecondMI, when FirstMI is unspecified, then check if
+/// SecondMI may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);
// Check if this processor supports macro-fusion. Since this is a minor
// heuristic, we haven't specifically reserved a feature. hasAVX is a decent
// proxy for SandyBridge+.
@@ -47,13 +39,10 @@ static bool shouldScheduleAdjacent(const X86Subtarget &ST,
FuseInc
} FuseKind;
- assert((First || Second) && "At least one instr must be specified");
- unsigned FirstOpcode = First
- ? First->getOpcode()
+ unsigned FirstOpcode = FirstMI
+ ? FirstMI->getOpcode()
: static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = Second
- ? Second->getOpcode()
- : static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
switch (SecondOpcode) {
default:
@@ -203,69 +192,11 @@ static bool shouldScheduleAdjacent(const X86Subtarget &ST,
}
}
-/// \brief Post-process the DAG to create cluster edges between instructions
-/// that may be fused by the processor into a single operation.
-class X86MacroFusion : public ScheduleDAGMutation {
-public:
- X86MacroFusion() {}
-
- void apply(ScheduleDAGInstrs *DAGInstrs) override;
-};
-
-void X86MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
- ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
- const X86Subtarget &ST = DAG->MF.getSubtarget<X86Subtarget>();
-
- // For now, assume targets can only fuse with the branch.
- SUnit &ExitSU = DAG->ExitSU;
- MachineInstr *Branch = ExitSU.getInstr();
- if (!Branch || !shouldScheduleAdjacent(ST, nullptr, Branch))
- return;
-
- for (SDep &PredDep : ExitSU.Preds) {
- if (PredDep.isWeak())
- continue;
- SUnit &SU = *PredDep.getSUnit();
- MachineInstr &Pred = *SU.getInstr();
- if (!shouldScheduleAdjacent(ST, &Pred, Branch))
- continue;
-
- // Create a single weak edge from SU to ExitSU. The only effect is to cause
- // bottom-up scheduling to heavily prioritize the clustered SU. There is no
- // need to copy predecessor edges from ExitSU to SU, since top-down
- // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
- // of SU, we could create an artificial edge from the deepest root, but it
- // hasn't been needed yet.
- bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
- (void)Success;
- assert(Success && "No DAG nodes should be reachable from ExitSU");
-
- // Adjust latency of data deps between the nodes.
- for (SDep &PredDep : ExitSU.Preds)
- if (PredDep.getSUnit() == &SU)
- PredDep.setLatency(0);
- for (SDep &SuccDep : SU.Succs)
- if (SuccDep.getSUnit() == &ExitSU)
- SuccDep.setLatency(0);
-
- ++NumFused;
- DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
- SU.print(dbgs(), DAG);
- dbgs() << " - ExitSU"
- << " / " << DAG->TII->getName(Pred.getOpcode()) << " - "
- << DAG->TII->getName(Branch->getOpcode()) << '\n';);
-
- break;
- }
-}
-
-} // end namespace
-
namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createX86MacroFusionDAGMutation () {
- return EnableMacroFusion ? make_unique<X86MacroFusion>() : nullptr;
+ return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);
}
} // end namespace llvm
diff --git a/lib/Target/X86/X86MacroFusion.h b/lib/Target/X86/X86MacroFusion.h
index e630f802e8e63..13fa2d78a0185 100644
--- a/lib/Target/X86/X86MacroFusion.h
+++ b/lib/Target/X86/X86MacroFusion.h
@@ -2,23 +2,18 @@
//
// The LLVM Compiler Infrastructure
//
-// \file This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
-// This file contains the X86 definition of the DAG scheduling mutation to pair
-// instructions back to back.
+/// \file This file contains the X86 definition of the DAG scheduling mutation
+/// to pair instructions back to back.
//
//===----------------------------------------------------------------------===//
-#include "X86InstrInfo.h"
#include "llvm/CodeGen/MachineScheduler.h"
-//===----------------------------------------------------------------------===//
-// X86MacroFusion - DAG post-processing to encourage fusion of macro ops.
-//===----------------------------------------------------------------------===//
-
namespace llvm {
/// Note that you have to add:
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 278b57eb00b74..a9f42cacf7886 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -91,6 +91,8 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return llvm::make_unique<X86FreeBSDTargetObjectFile>();
if (TT.isOSLinux() || TT.isOSNaCl() || TT.isOSIAMCU())
return llvm::make_unique<X86LinuxNaClTargetObjectFile>();
+ if (TT.isOSSolaris())
+ return llvm::make_unique<X86SolarisTargetObjectFile>();
if (TT.isOSFuchsia())
return llvm::make_unique<X86FuchsiaTargetObjectFile>();
if (TT.isOSBinFormatELF())
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 4fd95717478e9..8627c06d44313 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -86,6 +86,12 @@ X86LinuxNaClTargetObjectFile::Initialize(MCContext &Ctx,
InitializeELF(TM.Options.UseInitArray);
}
+void X86SolarisTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
+
const MCExpr *X86WindowsTargetObjectFile::lowerRelativeReference(
const GlobalValue *LHS, const GlobalValue *RHS,
const TargetMachine &TM) const {
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 39d2e84e5ed77..f6aa570b6332a 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -66,6 +66,11 @@ namespace llvm {
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
};
+ /// \brief This implementation is used for Solaris on x86/x86-64.
+ class X86SolarisTargetObjectFile : public X86ELFTargetObjectFile {
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+ };
+
/// \brief This implementation is used for Windows targets on x86 and x86-64.
class X86WindowsTargetObjectFile : public TargetLoweringObjectFileCOFF {
const MCExpr *
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 11ba7025e1b73..5ba8534d32d33 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2178,17 +2178,6 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace);
}
-bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2) {
- // X86 specific here are "instruction number 1st priority".
- return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
- C1.NumIVMuls, C1.NumBaseAdds,
- C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
- std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
- C2.NumIVMuls, C2.NumBaseAdds,
- C2.ScaleCost, C2.ImmCost, C2.SetupCost);
-}
-
bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
Type *ScalarTy = DataTy->getScalarType();
int DataWidth = isa<PointerType>(ScalarTy) ?
@@ -2243,6 +2232,12 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
return (CallerBits & CalleeBits) == CalleeBits;
}
+bool X86TTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) {
+ // TODO: We can increase these based on available vector ops.
+ MaxLoadSize = ST->is64Bit() ? 8 : 4;
+ return true;
+}
+
bool X86TTIImpl::enableInterleavedAccessVectorization() {
// TODO: We expect this to be beneficial regardless of arch,
// but there are currently some unexplained performance artifacts on Atom.
@@ -2250,6 +2245,114 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() {
return !(ST->isAtom());
}
+// Get estimation for interleaved load/store operations for AVX2.
+// \p Factor is the interleaved-access factor (stride) - number of
+// (interleaved) elements in the group.
+// \p Indices contains the indices for a strided load: when the
+// interleaved load has gaps they indicate which elements are used.
+// If Indices is empty (or if the number of indices is equal to the size
+// of the interleaved-access as given in \p Factor) the access has no gaps.
+//
+// As opposed to AVX-512, AVX2 does not have generic shuffles that allow
+// computing the cost using a generic formula as a function of generic
+// shuffles. We therefore use a lookup table instead, filled according to
+// the instruction sequences that codegen currently generates.
+int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+
+ // We currently Support only fully-interleaved groups, with no gaps.
+ // TODO: Support also strided loads (interleaved-groups with gaps).
+ if (Indices.size() && Indices.size() != Factor)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+
+ // VecTy for interleave memop is <VF*Factor x Elt>.
+ // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
+ // VecTy = <12 x i32>.
+ MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
+
+ // This function can be called with VecTy=<6xi128>, Factor=3, in which case
+ // the VF=2, while v2i128 is an unsupported MVT vector type
+ // (see MachineValueType.h::getVectorVT()).
+ if (!LegalVT.isVector())
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+
+ unsigned VF = VecTy->getVectorNumElements() / Factor;
+ Type *ScalarTy = VecTy->getVectorElementType();
+
+ // Calculate the number of memory operations (NumOfMemOps), required
+ // for load/store the VecTy.
+ unsigned VecTySize = DL.getTypeStoreSize(VecTy);
+ unsigned LegalVTSize = LegalVT.getStoreSize();
+ unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
+
+ // Get the cost of one memory operation.
+ Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
+ LegalVT.getVectorNumElements());
+ unsigned MemOpCost =
+ getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace);
+
+ VectorType *VT = VectorType::get(ScalarTy, VF);
+ EVT ETy = TLI->getValueType(DL, VT);
+ if (!ETy.isSimple())
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+
+ // TODO: Complete for other data-types and strides.
+ // Each combination of Stride, ElementTy and VF results in a different
+ // sequence; The cost tables are therefore accessed with:
+ // Factor (stride) and VectorType=VFxElemType.
+ // The Cost accounts only for the shuffle sequence;
+ // The cost of the loads/stores is accounted for separately.
+ //
+ static const CostTblEntry AVX2InterleavedLoadTbl[] = {
+ { 3, MVT::v2i8, 10 }, //(load 6i8 and) deinterleave into 3 x 2i8
+ { 3, MVT::v4i8, 4 }, //(load 12i8 and) deinterleave into 3 x 4i8
+ { 3, MVT::v8i8, 9 }, //(load 24i8 and) deinterleave into 3 x 8i8
+ { 3, MVT::v16i8, 18}, //(load 48i8 and) deinterleave into 3 x 16i8
+ { 3, MVT::v32i8, 42 }, //(load 96i8 and) deinterleave into 3 x 32i8
+
+ { 4, MVT::v2i8, 12 }, //(load 8i8 and) deinterleave into 4 x 2i8
+ { 4, MVT::v4i8, 4 }, //(load 16i8 and) deinterleave into 4 x 4i8
+ { 4, MVT::v8i8, 20 }, //(load 32i8 and) deinterleave into 4 x 8i8
+ { 4, MVT::v16i8, 39 }, //(load 64i8 and) deinterleave into 4 x 16i8
+ { 4, MVT::v32i8, 80 } //(load 128i8 and) deinterleave into 4 x 32i8
+ };
+
+ static const CostTblEntry AVX2InterleavedStoreTbl[] = {
+ { 3, MVT::v2i8, 7 }, //interleave 3 x 2i8 into 6i8 (and store)
+ { 3, MVT::v4i8, 8 }, //interleave 3 x 4i8 into 12i8 (and store)
+ { 3, MVT::v8i8, 11 }, //interleave 3 x 8i8 into 24i8 (and store)
+ { 3, MVT::v16i8, 17 }, //interleave 3 x 16i8 into 48i8 (and store)
+ { 3, MVT::v32i8, 32 }, //interleave 3 x 32i8 into 96i8 (and store)
+
+ { 4, MVT::v2i8, 12 }, //interleave 4 x 2i8 into 8i8 (and store)
+ { 4, MVT::v4i8, 9 }, //interleave 4 x 4i8 into 16i8 (and store)
+ { 4, MVT::v8i8, 16 }, //interleave 4 x 8i8 into 32i8 (and store)
+ { 4, MVT::v16i8, 20 }, //interleave 4 x 16i8 into 64i8 (and store)
+ { 4, MVT::v32i8, 40 } //interleave 4 x 32i8 into 128i8 (and store)
+ };
+
+ if (Opcode == Instruction::Load) {
+ if (const auto *Entry =
+ CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT()))
+ return NumOfMemOps * MemOpCost + Entry->Cost;
+ } else {
+ assert(Opcode == Instruction::Store &&
+ "Expected Store Instruction at this point");
+ if (const auto *Entry =
+ CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT()))
+ return NumOfMemOps * MemOpCost + Entry->Cost;
+ }
+
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+}
+
// Get estimation for interleaved load/store operations and strided load.
// \p Indices contains indices for strided load.
// \p Factor - the factor of interleaving.
@@ -2358,6 +2461,10 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
if (ST->hasAVX512() && HasAVX512Solution && (!RequiresBW || ST->hasBWI()))
return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace);
+ if (ST->hasAVX2())
+ return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace);
}
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index 09ce2c90498d9..ad0a0a2113012 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -93,6 +93,9 @@ public:
int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace);
+ int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
+ unsigned Factor, ArrayRef<unsigned> Indices,
+ unsigned Alignment, unsigned AddressSpace);
int getIntImmCost(int64_t);
@@ -101,15 +104,13 @@ public:
int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
- bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2);
bool isLegalMaskedLoad(Type *DataType);
bool isLegalMaskedStore(Type *DataType);
bool isLegalMaskedGather(Type *DataType);
bool isLegalMaskedScatter(Type *DataType);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
-
+ bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize);
bool enableInterleavedAccessVectorization();
private:
int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
diff --git a/lib/Testing/Support/LLVMBuild.txt b/lib/Testing/Support/LLVMBuild.txt
index 40853e8172d55..173cfb4a5587e 100644
--- a/lib/Testing/Support/LLVMBuild.txt
+++ b/lib/Testing/Support/LLVMBuild.txt
@@ -20,3 +20,4 @@ type = Library
name = TestingSupport
parent = Libraries
required_libraries = Support
+installed = 0
diff --git a/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 797e4ffc2d456..f304b9c9a8dac 100644
--- a/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -31,7 +31,7 @@ namespace {
enum {
OPT_INVALID = 0,
-#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID,
+#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID,
#include "Options.inc"
#undef OPTION
};
@@ -41,11 +41,9 @@ enum {
#undef PREFIX
static const llvm::opt::OptTable::Info infoTable[] = {
-#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \
- { \
- X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, X8, X7, \
- OPT_##GROUP, OPT_##ALIAS, X6 \
- },
+#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10, X11) \
+ {X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, \
+ X8, X7, OPT_##GROUP, OPT_##ALIAS, X6, X11},
#include "Options.inc"
#undef OPTION
};
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 4bc64ab698ff9..087a8aa2c624d 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -72,10 +72,6 @@ static cl::opt<bool>
RunLoopRerolling("reroll-loops", cl::Hidden,
cl::desc("Run the loop rerolling pass"));
-static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
- cl::Hidden,
- cl::desc("Run the load combining pass"));
-
static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
cl::desc("Run the NewGVN pass"));
@@ -174,7 +170,6 @@ PassManagerBuilder::PassManagerBuilder() {
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
RerollLoops = RunLoopRerolling;
- LoadCombine = RunLoadCombine;
NewGVN = RunNewGVN;
DisableGVNLoadPRE = false;
VerifyInput = false;
@@ -296,6 +291,8 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
InstrProfOptions Options;
if (!PGOInstrGen.empty())
Options.InstrProfileOutput = PGOInstrGen;
+ Options.DoCounterPromotion = true;
+ MPM.add(createLoopRotatePass());
MPM.add(createInstrProfilingLegacyPass(Options));
}
if (!PGOInstrUse.empty())
@@ -407,9 +404,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
}
}
- if (LoadCombine)
- MPM.add(createLoadCombinePass());
-
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
// Clean up after everything.
@@ -850,9 +844,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// alignments.
PM.add(createAlignmentFromAssumptionsPass());
- if (LoadCombine)
- PM.add(createLoadCombinePass());
-
// Cleanup and simplify the code after the scalar optimizations.
addInstructionCombiningPass(PM);
addExtensionsToPM(EP_Peephole, PM);
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 67bc8f5f6b7ad..656421ee58df8 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -690,6 +690,9 @@ bool SampleProfileLoader::inlineHotFunctions(
for (auto I : CIS) {
InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
Function *CalledFunction = CallSite(I).getCalledFunction();
+ // Do not inline recursive calls.
+ if (CalledFunction == &F)
+ continue;
Instruction *DI = I;
if (!CalledFunction && !PromotedInsns.count(I) &&
CallSite(I).isIndirectCall())
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 287a5167fe2ae..d5f0dd1914157 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -988,15 +988,24 @@ static Instruction *foldAddWithConstant(BinaryOperator &Add,
return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty);
}
- // Shifts and add used to flip and mask off the low bit:
- // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1
- const APInt *C3;
- if (C->isOneValue() &&
- match(Op0,
- m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3)))) &&
- C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) {
- Value *NotX = Builder.CreateNot(X);
- return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1));
+ if (C->isOneValue() && Op0->hasOneUse()) {
+ // add (sext i1 X), 1 --> zext (not X)
+ // TODO: The smallest IR representation is (select X, 0, 1), and that would
+ // not require the one-use check. But we need to remove a transform in
+ // visitSelect and make sure that IR value tracking for select is equal or
+ // better than for these ops.
+ if (match(Op0, m_SExt(m_Value(X))) &&
+ X->getType()->getScalarSizeInBits() == 1)
+ return new ZExtInst(Builder.CreateNot(X), Ty);
+
+ // Shifts and add used to flip and mask off the low bit:
+ // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1
+ const APInt *C3;
+ if (match(Op0, m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3))) &&
+ C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) {
+ Value *NotX = Builder.CreateNot(X);
+ return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1));
+ }
}
return nullptr;
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a881bda5ba98d..d3d8cefe97353 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1097,20 +1097,11 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
Type *DestTy = Logic.getType();
Type *SrcTy = Cast->getSrcTy();
- // If the first operand is bitcast, move the logic operation ahead of the
- // bitcast (do the logic operation in the original type). This can eliminate
- // bitcasts and allow combines that would otherwise be impeded by the bitcast.
+ // Move the logic operation ahead of a zext if the constant is unchanged in
+ // the smaller source type. Performing the logic in a smaller type may provide
+ // more information to later folds, and the smaller logic instruction may be
+ // cheaper (particularly in the case of vectors).
Value *X;
- if (match(Cast, m_BitCast(m_Value(X)))) {
- Value *NewConstant = ConstantExpr::getBitCast(C, SrcTy);
- Value *NewOp = Builder->CreateBinOp(LogicOpc, X, NewConstant);
- return CastInst::CreateBitOrPointerCast(NewOp, DestTy);
- }
-
- // Similarly, move the logic operation ahead of a zext if the constant is
- // unchanged in the smaller source type. Performing the logic in a smaller
- // type may provide more information to later folds, and the smaller logic
- // instruction may be cheaper (particularly in the case of vectors).
if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) {
Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);
Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy);
@@ -1239,9 +1230,10 @@ static Instruction *foldAndToXor(BinaryOperator &I,
// (A | ~B) & (B | ~A) --> ~(A ^ B)
// (~B | A) & (~A | B) --> ~(A ^ B)
// (~B | A) & (B | ~A) --> ~(A ^ B)
- if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B))))
- return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+ if (Op0->hasOneUse() || Op1->hasOneUse())
+ if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B))))
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
return nullptr;
}
@@ -1256,9 +1248,10 @@ static Instruction *foldOrToXor(BinaryOperator &I,
// Operand complexity canonicalization guarantees that the 'and' is Op0.
// (A & B) | ~(A | B) --> ~(A ^ B)
// (A & B) | ~(B | A) --> ~(A ^ B)
- if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
- match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
- return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+ if (Op0->hasOneUse() || Op1->hasOneUse())
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
// (A & ~B) | (~A & B) --> A ^ B
// (A & ~B) | (B & ~A) --> A ^ B
@@ -1442,13 +1435,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
- if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse())
+ if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C));
// ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
- if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse())
+ if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C));
// (A | B) & ((~A) ^ B) -> (A & B)
@@ -1579,11 +1572,14 @@ static Value *getSelectCondition(Value *A, Value *B,
// If A and B are sign-extended, look through the sexts to find the booleans.
Value *Cond;
+ Value *NotB;
if (match(A, m_SExt(m_Value(Cond))) &&
Cond->getType()->getScalarType()->isIntegerTy(1) &&
- match(B, m_CombineOr(m_Not(m_SExt(m_Specific(Cond))),
- m_SExt(m_Not(m_Specific(Cond))))))
- return Cond;
+ match(B, m_OneUse(m_Not(m_Value(NotB))))) {
+ NotB = peekThroughBitcast(NotB, true);
+ if (match(NotB, m_SExt(m_Specific(Cond))))
+ return Cond;
+ }
// All scalar (and most vector) possibilities should be handled now.
// Try more matches that only apply to non-splat constant vectors.
@@ -1615,12 +1611,8 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
// The potential condition of the select may be bitcasted. In that case, look
// through its bitcast and the corresponding bitcast of the 'not' condition.
Type *OrigType = A->getType();
- Value *SrcA, *SrcB;
- if (match(A, m_OneUse(m_BitCast(m_Value(SrcA)))) &&
- match(B, m_OneUse(m_BitCast(m_Value(SrcB))))) {
- A = SrcA;
- B = SrcB;
- }
+ A = peekThroughBitcast(A, true);
+ B = peekThroughBitcast(B, true);
if (Value *Cond = getSelectCondition(A, B, Builder)) {
// ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
@@ -1922,8 +1914,9 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
/// (A & C1) | B
///
/// when the XOR of the two constants is "all ones" (-1).
-Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
- Value *A, Value *B, Value *C) {
+static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C,
+ InstCombiner::BuilderTy *Builder) {
ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
if (!CI1) return nullptr;
@@ -1944,15 +1937,16 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
/// \brief This helper function folds:
///
-/// ((A | B) & C1) ^ (B & C2)
+/// ((A ^ B) & C1) | (B & C2)
///
/// into:
///
/// (A & C1) ^ B
///
/// when the XOR of the two constants is "all ones" (-1).
-Instruction *InstCombiner::FoldXorWithConstants(BinaryOperator &I, Value *Op,
- Value *A, Value *B, Value *C) {
+static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C,
+ InstCombiner::BuilderTy *Builder) {
ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
if (!CI1)
return nullptr;
@@ -2112,46 +2106,36 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
// ((A|B)&1)|(B&-2) -> (A&1) | B
- if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
- match(A, m_Or(m_Specific(B), m_Value(V1)))) {
- Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C);
- if (Ret) return Ret;
+ if (match(A, m_c_Or(m_Value(V1), m_Specific(B)))) {
+ if (Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C, Builder))
+ return Ret;
}
// (B&-2)|((A|B)&1) -> (A&1) | B
- if (match(B, m_Or(m_Specific(A), m_Value(V1))) ||
- match(B, m_Or(m_Value(V1), m_Specific(A)))) {
- Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D);
- if (Ret) return Ret;
+ if (match(B, m_c_Or(m_Specific(A), m_Value(V1)))) {
+ if (Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D, Builder))
+ return Ret;
}
// ((A^B)&1)|(B&-2) -> (A&1) ^ B
- if (match(A, m_Xor(m_Value(V1), m_Specific(B))) ||
- match(A, m_Xor(m_Specific(B), m_Value(V1)))) {
- Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C);
- if (Ret) return Ret;
+ if (match(A, m_c_Xor(m_Value(V1), m_Specific(B)))) {
+ if (Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C, Builder))
+ return Ret;
}
// (B&-2)|((A^B)&1) -> (A&1) ^ B
- if (match(B, m_Xor(m_Specific(A), m_Value(V1))) ||
- match(B, m_Xor(m_Value(V1), m_Specific(A)))) {
- Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D);
- if (Ret) return Ret;
+ if (match(B, m_c_Xor(m_Specific(A), m_Value(V1)))) {
+ if (Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D, Builder))
+ return Ret;
}
}
// (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
- // FIXME: The two hasOneUse calls here are the same call, maybe we were
- // supposed to check Op1->operand(0)?
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
- if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse())
- return BinaryOperator::CreateOr(Op0, C);
+ return BinaryOperator::CreateOr(Op0, C);
// ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C
- // FIXME: The two hasOneUse calls here are the same call, maybe we were
- // supposed to check Op0->operand(0)?
if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
- if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse())
- return BinaryOperator::CreateOr(Op1, C);
+ return BinaryOperator::CreateOr(Op1, C);
// ((B | C) & A) | B -> B | (A & C)
if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
@@ -2357,6 +2341,30 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
}
+ // Instead of trying to imitate the folds for and/or, decompose this 'xor'
+ // into those logic ops. That is, try to turn this into an and-of-icmps
+ // because we have many folds for that pattern.
+ //
+ // This is based on a truth table definition of xor:
+ // X ^ Y --> (X | Y) & !(X & Y)
+ if (Value *OrICmp = SimplifyBinOp(Instruction::Or, LHS, RHS, SQ)) {
+ // TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y).
+ // TODO: If OrICmp is false, the whole thing is false (InstSimplify?).
+ if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) {
+ // TODO: Independently handle cases where the 'and' side is a constant.
+ if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) {
+ // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS
+ RHS->setPredicate(RHS->getInversePredicate());
+ return Builder->CreateAnd(LHS, RHS);
+ }
+ if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) {
+ // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS
+ LHS->setPredicate(LHS->getInversePredicate());
+ return Builder->CreateAnd(LHS, RHS);
+ }
+ }
+ }
+
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c0830a5d21124..dbed7ad4eae84 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1409,6 +1409,47 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
}
}
+ // Add range metadata since known bits can't completely reflect what we know.
+ // TODO: Handle splat vectors.
+ auto *IT = dyn_cast<IntegerType>(Op0->getType());
+ if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+ Metadata *LowAndHigh[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
+ ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
+ II.setMetadata(LLVMContext::MD_range,
+ MDNode::get(II.getContext(), LowAndHigh));
+ return &II;
+ }
+
+ return nullptr;
+}
+
+static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
+ assert(II.getIntrinsicID() == Intrinsic::ctpop &&
+ "Expected ctpop intrinsic");
+ Value *Op0 = II.getArgOperand(0);
+ // FIXME: Try to simplify vectors of integers.
+ auto *IT = dyn_cast<IntegerType>(Op0->getType());
+ if (!IT)
+ return nullptr;
+
+ unsigned BitWidth = IT->getBitWidth();
+ KnownBits Known(BitWidth);
+ IC.computeKnownBits(Op0, Known, 0, &II);
+
+ unsigned MinCount = Known.countMinPopulation();
+ unsigned MaxCount = Known.countMaxPopulation();
+
+ // Add range metadata since known bits can't completely reflect what we know.
+ if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+ Metadata *LowAndHigh[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
+ ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
+ II.setMetadata(LLVMContext::MD_range,
+ MDNode::get(II.getContext(), LowAndHigh));
+ return &II;
+ }
+
return nullptr;
}
@@ -1981,6 +2022,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return I;
break;
+ case Intrinsic::ctpop:
+ if (auto *I = foldCtpop(*II, *this))
+ return I;
+ break;
+
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::umul_with_overflow:
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 38e95fb116396..d3049389dfb9f 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1896,6 +1896,18 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
}
+ // Canonicalize vector bitcasts to come before vector bitwise logic with a
+ // constant. This eases recognition of special constants for later ops.
+ // Example:
+ // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
+ Constant *C;
+ if (match(BO->getOperand(1), m_Constant(C))) {
+ // bitcast (logic X, C) --> logic (bitcast X, C')
+ Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
+ Value *CastedC = ConstantExpr::getBitCast(C, DestTy);
+ return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
+ }
+
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 1ef4acfb058c4..6ad32490a3288 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2434,6 +2434,77 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
return nullptr;
}
+bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
+ Value *&RHS, ConstantInt *&Less,
+ ConstantInt *&Equal,
+ ConstantInt *&Greater) {
+ // TODO: Generalize this to work with other comparison idioms or ensure
+ // they get canonicalized into this form.
+
+ // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32
+ // Greater), where Equal, Less and Greater are placeholders for any three
+ // constants.
+ ICmpInst::Predicate PredA, PredB;
+ if (match(SI->getTrueValue(), m_ConstantInt(Equal)) &&
+ match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) &&
+ PredA == ICmpInst::ICMP_EQ &&
+ match(SI->getFalseValue(),
+ m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)),
+ m_ConstantInt(Less), m_ConstantInt(Greater))) &&
+ PredB == ICmpInst::ICMP_SLT) {
+ return true;
+ }
+ return false;
+}
+
+Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
+ Instruction *Select,
+ ConstantInt *C) {
+
+ assert(C && "Cmp RHS should be a constant int!");
+ // If we're testing a constant value against the result of a three way
+ // comparison, the result can be expressed directly in terms of the
+ // original values being compared. Note: We could possibly be more
+ // aggressive here and remove the hasOneUse test. The original select is
+ // really likely to simplify or sink when we remove a test of the result.
+ Value *OrigLHS, *OrigRHS;
+ ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan;
+ if (Cmp.hasOneUse() &&
+ matchThreeWayIntCompare(cast<SelectInst>(Select), OrigLHS, OrigRHS,
+ C1LessThan, C2Equal, C3GreaterThan)) {
+ assert(C1LessThan && C2Equal && C3GreaterThan);
+
+ bool TrueWhenLessThan =
+ ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C)
+ ->isAllOnesValue();
+ bool TrueWhenEqual =
+ ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C)
+ ->isAllOnesValue();
+ bool TrueWhenGreaterThan =
+ ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C)
+ ->isAllOnesValue();
+
+ // This generates the new instruction that will replace the original Cmp
+ // Instruction. Instead of enumerating the various combinations when
+ // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus
+ // false, we rely on chaining of ORs and future passes of InstCombine to
+ // simplify the OR further (i.e. a s< b || a == b becomes a s<= b).
+
+ // When none of the three constants satisfy the predicate for the RHS (C),
+ // the entire original Cmp can be simplified to a false.
+ Value *Cond = Builder->getFalse();
+ if (TrueWhenLessThan)
+ Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
+ if (TrueWhenEqual)
+ Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
+ if (TrueWhenGreaterThan)
+ Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
+
+ return replaceInstUsesWith(Cmp, Cond);
+ }
+ return nullptr;
+}
+
/// Try to fold integer comparisons with a constant operand: icmp Pred X, C
/// where X is some kind of instruction.
Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
@@ -2493,11 +2564,28 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
return I;
}
+ // Match against CmpInst LHS being instructions other than binary operators.
Instruction *LHSI;
- if (match(Cmp.getOperand(0), m_Instruction(LHSI)) &&
- LHSI->getOpcode() == Instruction::Trunc)
- if (Instruction *I = foldICmpTruncConstant(Cmp, LHSI, C))
- return I;
+ if (match(Cmp.getOperand(0), m_Instruction(LHSI))) {
+ switch (LHSI->getOpcode()) {
+ case Instruction::Select:
+ {
+ // For now, we only support constant integers while folding the
+ // ICMP(SELECT)) pattern. We can extend this to support vector of integers
+ // similar to the cases handled by binary ops above.
+ if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
+ if (Instruction *I = foldICmpSelectConstant(Cmp, LHSI, ConstRHS))
+ return I;
+ break;
+ }
+ case Instruction::Trunc:
+ if (Instruction *I = foldICmpTruncConstant(Cmp, LHSI, C))
+ return I;
+ break;
+ default:
+ break;
+ }
+ }
if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, C))
return I;
@@ -3110,8 +3198,7 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
if (BO0) {
// Transform A & (L - 1) `ult` L --> L != 0
auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
- auto BitwiseAnd =
- m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value()));
+ auto BitwiseAnd = m_c_And(m_Value(), LSubOne);
if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) {
auto *Zero = Constant::getNullValue(BO0->getType());
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 1a7db146df426..1b0fe84dd4dda 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -95,6 +95,18 @@ static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) {
}
}
+/// Return the source operand of a potentially bitcasted value while optionally
+/// checking if it has one use. If there is no bitcast or the one use check is
+/// not met, return the input value itself.
+static inline Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) {
+ if (auto *BitCast = dyn_cast<BitCastInst>(V))
+ if (!OneUseOnly || BitCast->hasOneUse())
+ return BitCast->getOperand(0);
+
+ // V is not a bitcast or V has more than one use and OneUseOnly is true.
+ return V;
+}
+
/// \brief Add one to a Constant
static inline Constant *AddOne(Constant *C) {
return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
@@ -276,10 +288,6 @@ public:
Instruction *visitFDiv(BinaryOperator &I);
Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted);
Instruction *visitAnd(BinaryOperator &I);
- Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A,
- Value *B, Value *C);
- Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A,
- Value *B, Value *C);
Instruction *visitOr(BinaryOperator &I);
Instruction *visitXor(BinaryOperator &I);
Instruction *visitShl(BinaryOperator &I);
@@ -595,6 +603,15 @@ private:
Instruction::BinaryOps, Value *, Value *, Value *,
Value *);
+ /// Match a select chain which produces one of three values based on whether
+ /// the LHS is less than, equal to, or greater than RHS respectively.
+ /// Return true if we matched a three way compare idiom. The LHS, RHS, Less,
+ /// Equal and Greater values are saved in the matching process and returned to
+ /// the caller.
+ bool matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, Value *&RHS,
+ ConstantInt *&Less, ConstantInt *&Equal,
+ ConstantInt *&Greater);
+
/// \brief Attempts to replace V with a simpler value based on the demanded
/// bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known,
@@ -672,6 +689,8 @@ private:
Instruction *foldICmpBinOp(ICmpInst &Cmp);
Instruction *foldICmpEquality(ICmpInst &Cmp);
+ Instruction *foldICmpSelectConstant(ICmpInst &Cmp, Instruction *Select,
+ ConstantInt *C);
Instruction *foldICmpTruncConstant(ICmpInst &Cmp, Instruction *Trunc,
const APInt *C);
Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And,
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index a4d84ae81aa02..ca370c73fca44 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -169,6 +169,18 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
return nullptr;
}
+/// Returns true if V is dereferenceable for size of alloca.
+static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
+ const DataLayout &DL) {
+ if (AI->isArrayAllocation())
+ return false;
+ uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
+ if (!AllocaSize)
+ return false;
+ return isDereferenceableAndAlignedPointer(V, AI->getAlignment(),
+ APInt(64, AllocaSize), DL);
+}
+
static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
// Check for array size of 1 (scalar allocation).
if (!AI.isArrayAllocation()) {
@@ -390,7 +402,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
unsigned SourceAlign = getOrEnforceKnownAlignment(
Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
- if (AI.getAlignment() <= SourceAlign) {
+ if (AI.getAlignment() <= SourceAlign &&
+ isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
@@ -476,21 +489,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
break;
case LLVMContext::MD_nonnull:
- // This only directly applies if the new type is also a pointer.
- if (NewTy->isPointerTy()) {
- NewLoad->setMetadata(ID, N);
- break;
- }
- // If it's integral now, translate it to !range metadata.
- if (NewTy->isIntegerTy()) {
- auto *ITy = cast<IntegerType>(NewTy);
- auto *NullInt = ConstantExpr::getPtrToInt(
- ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
- auto *NonNullInt =
- ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
- NewLoad->setMetadata(LLVMContext::MD_range,
- MDB.createRange(NonNullInt, NullInt));
- }
+ copyNonnullMetadata(LI, N, *NewLoad);
break;
case LLVMContext::MD_align:
case LLVMContext::MD_dereferenceable:
@@ -500,17 +499,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
NewLoad->setMetadata(ID, N);
break;
case LLVMContext::MD_range:
- // FIXME: It would be nice to propagate this in some way, but the type
- // conversions make it hard.
-
- // If it's a pointer now and the range does not contain 0, make it !nonnull.
- if (NewTy->isPointerTy()) {
- unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy);
- if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
- MDNode *NN = MDNode::get(LI.getContext(), None);
- NewLoad->setMetadata(LLVMContext::MD_nonnull, NN);
- }
- }
+ copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
break;
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index b9674d85634dc..33951e66497a1 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -303,7 +303,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// We want to turn:
/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
/// into:
-/// (or (shl (and X, C1), C3), y)
+/// (or (shl (and X, C1), C3), Y)
/// iff:
/// C1 and C2 are both powers of 2
/// where:
@@ -317,19 +317,44 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
Value *FalseVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
- if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
+ if (!IC || !SI.getType()->isIntegerTy())
return nullptr;
Value *CmpLHS = IC->getOperand(0);
Value *CmpRHS = IC->getOperand(1);
- if (!match(CmpRHS, m_Zero()))
- return nullptr;
+ Value *V;
+ unsigned C1Log;
+ bool IsEqualZero;
+ bool NeedAnd = false;
+ if (IC->isEquality()) {
+ if (!match(CmpRHS, m_Zero()))
+ return nullptr;
+
+ const APInt *C1;
+ if (!match(CmpLHS, m_And(m_Value(), m_Power2(C1))))
+ return nullptr;
+
+ V = CmpLHS;
+ C1Log = C1->logBase2();
+ IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_EQ;
+ } else if (IC->getPredicate() == ICmpInst::ICMP_SLT ||
+ IC->getPredicate() == ICmpInst::ICMP_SGT) {
+ // We also need to recognize (icmp slt (trunc (X)), 0) and
+ // (icmp sgt (trunc (X)), -1).
+ IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_SGT;
+ if ((IsEqualZero && !match(CmpRHS, m_AllOnes())) ||
+ (!IsEqualZero && !match(CmpRHS, m_Zero())))
+ return nullptr;
+
+ if (!match(CmpLHS, m_OneUse(m_Trunc(m_Value(V)))))
+ return nullptr;
- Value *X;
- const APInt *C1;
- if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
+ C1Log = CmpLHS->getType()->getScalarSizeInBits() - 1;
+ NeedAnd = true;
+ } else {
return nullptr;
+ }
const APInt *C2;
bool OrOnTrueVal = false;
@@ -340,11 +365,27 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
if (!OrOnFalseVal && !OrOnTrueVal)
return nullptr;
- Value *V = CmpLHS;
Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
- unsigned C1Log = C1->logBase2();
unsigned C2Log = C2->logBase2();
+
+ bool NeedXor = (!IsEqualZero && OrOnFalseVal) || (IsEqualZero && OrOnTrueVal);
+ bool NeedShift = C1Log != C2Log;
+ bool NeedZExtTrunc = Y->getType()->getIntegerBitWidth() !=
+ V->getType()->getIntegerBitWidth();
+
+ // Make sure we don't create more instructions than we save.
+ Value *Or = OrOnFalseVal ? FalseVal : TrueVal;
+ if ((NeedShift + NeedXor + NeedZExtTrunc) >
+ (IC->hasOneUse() + Or->hasOneUse()))
+ return nullptr;
+
+ if (NeedAnd) {
+ // Insert the AND instruction on the input to the truncate.
+ APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log);
+ V = Builder->CreateAnd(V, ConstantInt::get(V->getType(), C1));
+ }
+
if (C2Log > C1Log) {
V = Builder->CreateZExtOrTrunc(V, Y->getType());
V = Builder->CreateShl(V, C2Log - C1Log);
@@ -354,9 +395,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
} else
V = Builder->CreateZExtOrTrunc(V, Y->getType());
- ICmpInst::Predicate Pred = IC->getPredicate();
- if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
- (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal))
+ if (NeedXor)
V = Builder->CreateXor(V, *C2);
return Builder->CreateOr(V, Y);
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 8cec865c6422a..1bb1a85367d1b 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -556,8 +556,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
// The inexact versions are deferred to DAGCombine, so we don't hide shl
// behind a bit mask.
const APInt *ShOp1;
- if (match(Op0, m_CombineOr(m_Exact(m_LShr(m_Value(X), m_APInt(ShOp1))),
- m_Exact(m_AShr(m_Value(X), m_APInt(ShOp1)))))) {
+ if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1))))) {
unsigned ShrAmt = ShOp1->getZExtValue();
if (ShrAmt < ShAmt) {
// If C1 < C2: (X >>?,exact C1) << C2 --> X << (C2 - C1)
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 65e6d2e359052..02fac4fb37a4b 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -939,9 +939,19 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// `TrueVInPred`.
if (InC && !isa<ConstantExpr>(InC) && isa<ConstantInt>(InC))
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
- else
+ else {
+ // Generate the select in the same block as PN's current incoming block.
+ // Note: ThisBB need not be the NonConstBB because vector constants
+ // which are constants by definition are handled here.
+ // FIXME: This can lead to an increase in IR generation because we might
+ // generate selects for vector constant phi operand, that could not be
+ // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For
+ // non-vector phis, this transformation was always profitable because
+ // the select would be generated exactly once in the NonConstBB.
+ Builder->SetInsertPoint(ThisBB->getTerminator());
InV = Builder->CreateSelect(PN->getIncomingValue(i),
TrueVInPred, FalseVInPred, "phitmp");
+ }
NewPN->addIncoming(InV, ThisBB);
}
} else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
@@ -3002,6 +3012,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
++NumDeadInst;
DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
Inst->eraseFromParent();
+ MadeIRChange = true;
continue;
}
@@ -3015,6 +3026,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
++NumConstProp;
if (isInstructionTriviallyDead(Inst, TLI))
Inst->eraseFromParent();
+ MadeIRChange = true;
continue;
}
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 0d308810009d5..4089d81ea3e1b 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -642,7 +642,12 @@ static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) {
if (DisableICP)
return false;
InstrProfSymtab Symtab;
- Symtab.create(M, InLTO);
+ if (Error E = Symtab.create(M, InLTO)) {
+ std::string SymtabFailure = toString(std::move(E));
+ DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n");
+ (void)SymtabFailure;
+ return false;
+ }
bool Changed = false;
for (auto &F : M) {
if (F.isDeclaration())
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 37f88d5f95f18..9c14b0149fdc1 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -19,12 +19,14 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -40,7 +42,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -92,6 +97,35 @@ cl::opt<double> NumCountersPerValueSite(
// is usually smaller than 2.
cl::init(1.0));
+cl::opt<bool> AtomicCounterUpdatePromoted(
+ "atomic-counter-update-promoted", cl::ZeroOrMore,
+ cl::desc("Do counter update using atomic fetch add "
+ " for promoted counters only"),
+ cl::init(false));
+
+// If the option is not specified, the default behavior about whether
+// counter promotion is done depends on how instrumentaiton lowering
+// pipeline is setup, i.e., the default value of true of this option
+// does not mean the promotion will be done by default. Explicitly
+// setting this option can override the default behavior.
+cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
+ cl::desc("Do counter register promotion"),
+ cl::init(false));
+cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
+ cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(10),
+ cl::desc("Max number counter promotions per loop to avoid"
+ " increasing register pressure too much"));
+
+// A debug option
+cl::opt<int>
+ MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
+ cl::desc("Max number of allowed counter promotions"));
+
+cl::opt<bool> SpeculativeCounterPromotion(
+ cl::ZeroOrMore, "speculative-counter-promotion", cl::init(false),
+ cl::desc("Allow counter promotion for loops with multiple exiting blocks "
+ " or top-tested loops. "));
+
class InstrProfilingLegacyPass : public ModulePass {
InstrProfiling InstrProf;
@@ -116,6 +150,123 @@ public:
}
};
+/// A helper class to promote one counter RMW operation in the loop
+/// into register update.
+///
+/// RWM update for the counter will be sinked out of the loop after
+/// the transformation.
+///
+class PGOCounterPromoterHelper : public LoadAndStorePromoter {
+public:
+ PGOCounterPromoterHelper(Instruction *L, Instruction *S, SSAUpdater &SSA,
+ Value *Init, BasicBlock *PH,
+ ArrayRef<BasicBlock *> ExitBlocks,
+ ArrayRef<Instruction *> InsertPts)
+ : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
+ InsertPts(InsertPts) {
+ assert(isa<LoadInst>(L));
+ assert(isa<StoreInst>(S));
+ SSA.AddAvailableValue(PH, Init);
+ }
+ void doExtraRewritesBeforeFinalDeletion() const override {
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ Instruction *InsertPos = InsertPts[i];
+ // Get LiveIn value into the ExitBlock. If there are multiple
+ // predecessors, the value is defined by a PHI node in this
+ // block.
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
+ IRBuilder<> Builder(InsertPos);
+ if (AtomicCounterUpdatePromoted)
+ Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
+ AtomicOrdering::SequentiallyConsistent);
+ else {
+ LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
+ auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
+ Builder.CreateStore(NewVal, Addr);
+ }
+ }
+ }
+
+private:
+ Instruction *Store;
+ ArrayRef<BasicBlock *> ExitBlocks;
+ ArrayRef<Instruction *> InsertPts;
+};
+
+/// A helper class to do register promotion for all profile counter
+/// updates in a loop.
+///
+class PGOCounterPromoter {
+public:
+ PGOCounterPromoter(ArrayRef<LoadStorePair> Cands, Loop &Loop)
+ : Candidates(Cands), ExitBlocks(), InsertPts(), ParentLoop(Loop) {
+
+ SmallVector<BasicBlock *, 8> LoopExitBlocks;
+ SmallPtrSet<BasicBlock *, 8> BlockSet;
+ ParentLoop.getExitBlocks(LoopExitBlocks);
+
+ for (BasicBlock *ExitBlock : LoopExitBlocks) {
+ if (BlockSet.insert(ExitBlock).second) {
+ ExitBlocks.push_back(ExitBlock);
+ InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+ }
+ }
+ }
+
+ bool run(int64_t *NumPromoted) {
+ // We can't insert into a catchswitch.
+ bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
+ return isa<CatchSwitchInst>(Exit->getTerminator());
+ });
+
+ if (HasCatchSwitch)
+ return false;
+
+ if (!ParentLoop.hasDedicatedExits())
+ return false;
+
+ BasicBlock *PH = ParentLoop.getLoopPreheader();
+ if (!PH)
+ return false;
+
+ BasicBlock *H = ParentLoop.getHeader();
+ bool TopTested =
+ ((ParentLoop.getBlocks().size() > 1) && ParentLoop.isLoopExiting(H));
+ if (!SpeculativeCounterPromotion &&
+ (TopTested || ParentLoop.getExitingBlock() == nullptr))
+ return false;
+
+ unsigned Promoted = 0;
+ for (auto &Cand : Candidates) {
+
+ SmallVector<PHINode *, 4> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+ Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
+ PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
+ PH, ExitBlocks, InsertPts);
+ Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
+ Promoted++;
+ if (Promoted >= MaxNumOfPromotionsPerLoop)
+ break;
+ (*NumPromoted)++;
+ if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
+ break;
+ }
+
+ DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
+ << ParentLoop.getLoopDepth() << ")\n");
+ return Promoted != 0;
+ }
+
+private:
+ ArrayRef<LoadStorePair> Candidates;
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ SmallVector<Instruction *, 8> InsertPts;
+ Loop &ParentLoop;
+};
+
} // end anonymous namespace
PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
@@ -147,6 +298,63 @@ static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
return dyn_cast<InstrProfIncrementInst>(Instr);
}
+bool InstrProfiling::lowerIntrinsics(Function *F) {
+ bool MadeChange = false;
+ PromotionCandidates.clear();
+ for (BasicBlock &BB : *F) {
+ for (auto I = BB.begin(), E = BB.end(); I != E;) {
+ auto Instr = I++;
+ InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
+ if (Inc) {
+ lowerIncrement(Inc);
+ MadeChange = true;
+ } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
+ lowerValueProfileInst(Ind);
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (!MadeChange)
+ return false;
+
+ promoteCounterLoadStores(F);
+ return true;
+}
+
+bool InstrProfiling::isCounterPromotionEnabled() const {
+ if (DoCounterPromotion.getNumOccurrences() > 0)
+ return DoCounterPromotion;
+
+ return Options.DoCounterPromotion;
+}
+
+void InstrProfiling::promoteCounterLoadStores(Function *F) {
+ if (!isCounterPromotionEnabled())
+ return;
+
+ DominatorTree DT(*F);
+ LoopInfo LI(DT);
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
+
+ for (const auto &LoadStore : PromotionCandidates) {
+ auto *CounterLoad = LoadStore.first;
+ auto *CounterStore = LoadStore.second;
+ BasicBlock *BB = CounterLoad->getParent();
+ Loop *ParentLoop = LI.getLoopFor(BB);
+ if (!ParentLoop)
+ continue;
+ LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
+ }
+
+ SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
+
+ for (auto *Loop : Loops) {
+ PGOCounterPromoter Promoter(LoopPromotionCandidates[Loop], *Loop);
+ Promoter.run(&TotalCountersPromoted);
+ }
+}
+
bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
bool MadeChange = false;
@@ -179,18 +387,7 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
}
for (Function &F : M)
- for (BasicBlock &BB : F)
- for (auto I = BB.begin(), E = BB.end(); I != E;) {
- auto Instr = I++;
- InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
- if (Inc) {
- lowerIncrement(Inc);
- MadeChange = true;
- } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
- lowerValueProfileInst(Ind);
- MadeChange = true;
- }
- }
+ MadeChange |= lowerIntrinsics(&F);
if (GlobalVariable *CoverageNamesVar =
M.getNamedGlobal(getCoverageUnusedNamesVarName())) {
@@ -303,9 +500,12 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
IRBuilder<> Builder(Inc);
uint64_t Index = Inc->getIndex()->getZExtValue();
Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
- Value *Count = Builder.CreateLoad(Addr, "pgocount");
- Count = Builder.CreateAdd(Count, Inc->getStep());
- Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr));
+ Value *Load = Builder.CreateLoad(Addr, "pgocount");
+ auto *Count = Builder.CreateAdd(Load, Inc->getStep());
+ auto *Store = Builder.CreateStore(Count, Addr);
+ Inc->replaceAllUsesWith(Store);
+ if (isCounterPromotionEnabled())
+ PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
Inc->eraseFromParent();
}
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index b2d95271479c3..0e7d11c553977 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1177,7 +1177,7 @@ void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
{llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
- Builder.getInt64(FuncHash), Builder.CreatePtrToInt(Length, Int64Ty),
+ Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty),
Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)});
++CurCtrId;
}
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index f5196cc461815..457c9427ab9ac 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -22,7 +22,6 @@ add_llvm_library(LLVMScalarOpts
LICM.cpp
LoopAccessAnalysisPrinter.cpp
LoopSink.cpp
- LoadCombine.cpp
LoopDeletion.cpp
LoopDataPrefetch.cpp
LoopDistribute.cpp
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 2a4c9526dfcd9..28157783daa7a 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -232,8 +232,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
if (PB == PE) return false;
- // Analyse each switch case in turn. This is done in reverse order so that
- // removing a case doesn't cause trouble for the iteration.
+ // Analyse each switch case in turn.
bool Changed = false;
for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
@@ -291,7 +290,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
break;
}
- // Increment the case iterator sense we didn't delete it.
+ // Increment the case iterator since we didn't delete it.
++CI;
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0490d93f64553..c0f628eb61e61 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -80,9 +80,10 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
struct llvm::GVN::Expression {
uint32_t opcode;
Type *type;
+ bool commutative;
SmallVector<uint32_t, 4> varargs;
- Expression(uint32_t o = ~2U) : opcode(o) {}
+ Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
@@ -246,6 +247,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
+ e.commutative = true;
}
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
@@ -256,6 +258,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (C->getOpcode() << 8) | Predicate;
+ e.commutative = true;
} else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
II != IE; ++II)
@@ -281,6 +284,7 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (Opcode << 8) | Predicate;
+ e.commutative = true;
return e;
}
@@ -348,25 +352,25 @@ GVN::ValueTable::~ValueTable() = default;
/// add - Insert a value into the table with a specified value number.
void GVN::ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
+ if (PHINode *PN = dyn_cast<PHINode>(V))
+ NumberingPhi[num] = PN;
}
uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = createExpr(C);
- uint32_t &e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
Expression exp = createExpr(C);
- uint32_t &e = expressionNumbering[exp];
- if (!e) {
- e = nextValueNumber++;
- valueNumbering[C] = e;
- return e;
+ auto ValNum = assignExpNewValueNum(exp);
+ if (ValNum.second) {
+ valueNumbering[C] = ValNum.first;
+ return ValNum.first;
}
if (!MD) {
- e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
}
@@ -522,23 +526,29 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
case Instruction::ExtractValue:
exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
break;
+ case Instruction::PHI:
+ valueNumbering[V] = nextValueNumber;
+ NumberingPhi[nextValueNumber] = cast<PHINode>(V);
+ return nextValueNumber++;
default:
valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
}
- uint32_t& e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[V] = e;
return e;
}
/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V) const {
+uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
- assert(VI != valueNumbering.end() && "Value not numbered?");
- return VI->second;
+ if (Verify) {
+ assert(VI != valueNumbering.end() && "Value not numbered?");
+ return VI->second;
+ }
+ return (VI != valueNumbering.end()) ? VI->second : 0;
}
/// Returns the value number of the given comparison,
@@ -549,21 +559,28 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
CmpInst::Predicate Predicate,
Value *LHS, Value *RHS) {
Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
- uint32_t& e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
- return e;
+ return assignExpNewValueNum(exp).first;
}
/// Remove all entries from the ValueTable.
void GVN::ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
+ NumberingPhi.clear();
+ PhiTranslateTable.clear();
nextValueNumber = 1;
+ Expressions.clear();
+ ExprIdx.clear();
+ nextExprNumber = 0;
}
/// Remove a value from the value numbering.
void GVN::ValueTable::erase(Value *V) {
+ uint32_t Num = valueNumbering.lookup(V);
valueNumbering.erase(V);
+ // If V is PHINode, V <--> value number is an one-to-one mapping.
+ if (isa<PHINode>(V))
+ NumberingPhi.erase(Num);
}
/// verifyRemoved - Verify that the value is removed from all internal data
@@ -602,7 +619,7 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) {
+LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
errs() << "{\n";
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
E = d.end(); I != E; ++I) {
@@ -1451,6 +1468,95 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
+/// Return a pair the first field showing the value number of \p Exp and the
+/// second field showing whether it is a value number newly created.
+std::pair<uint32_t, bool>
+GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
+ uint32_t &e = expressionNumbering[Exp];
+ bool CreateNewValNum = !e;
+ if (CreateNewValNum) {
+ Expressions.push_back(Exp);
+ if (ExprIdx.size() < nextValueNumber + 1)
+ ExprIdx.resize(nextValueNumber * 2);
+ e = nextValueNumber;
+ ExprIdx[nextValueNumber++] = nextExprNumber++;
+ }
+ return {e, CreateNewValNum};
+}
+
+/// Return whether all the values related with the same \p num are
+/// defined in \p BB.
+bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
+ GVN &Gvn) {
+ LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
+ while (Vals && Vals->BB == BB)
+ Vals = Vals->Next;
+ return !Vals;
+}
+
+/// Wrap phiTranslateImpl to provide caching functionality.
+uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock, uint32_t Num,
+ GVN &Gvn) {
+ auto FindRes = PhiTranslateTable.find({Num, Pred});
+ if (FindRes != PhiTranslateTable.end())
+ return FindRes->second;
+ uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
+ PhiTranslateTable.insert({{Num, Pred}, NewNum});
+ return NewNum;
+}
+
+/// Translate value number \p Num using phis, so that it has the values of
+/// the phis in BB.
+uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ uint32_t Num, GVN &Gvn) {
+ if (PHINode *PN = NumberingPhi[Num]) {
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
+ if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
+ return TransVal;
+ }
+ return Num;
+ }
+
+ // If there is any value related with Num is defined in a BB other than
+ // PhiBlock, it cannot depend on a phi in PhiBlock without going through
+ // a backedge. We can do an early exit in that case to save compile time.
+ if (!areAllValsInBB(Num, PhiBlock, Gvn))
+ return Num;
+
+ if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
+ return Num;
+ Expression Exp = Expressions[ExprIdx[Num]];
+
+ for (unsigned i = 0; i < Exp.varargs.size(); i++) {
+ // For InsertValue and ExtractValue, some varargs are index numbers
+ // instead of value numbers. Those index numbers should not be
+ // translated.
+ if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
+ (i > 0 && Exp.opcode == Instruction::ExtractValue))
+ continue;
+ Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
+ }
+
+ if (Exp.commutative) {
+ assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
+ if (Exp.varargs[0] > Exp.varargs[1]) {
+ std::swap(Exp.varargs[0], Exp.varargs[1]);
+ uint32_t Opcode = Exp.opcode >> 8;
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
+ Exp.opcode = (Opcode << 8) |
+ CmpInst::getSwappedPredicate(
+ static_cast<CmpInst::Predicate>(Exp.opcode & 255));
+ }
+ }
+
+ if (uint32_t NewNum = expressionNumbering[Exp])
+ return NewNum;
+ return Num;
+}
+
// In order to find a leader for a given value number at a
// specific basic block, we first obtain the list of all Values for that number,
// and then scan the list to find one whose block dominates the block in
@@ -1495,6 +1601,15 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
return Pred != nullptr;
}
+
+void GVN::assignBlockRPONumber(Function &F) {
+ uint32_t NextBlockNumber = 1;
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+ for (BasicBlock *BB : RPOT)
+ BlockRPONumber[BB] = NextBlockNumber++;
+}
+
+
// Tries to replace instruction with const, using information from
// ReplaceWithConstMap.
bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
@@ -1856,6 +1971,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
// Fabricate val-num for dead-code in order to suppress assertion in
// performPRE().
assignValNumForDeadCode();
+ assignBlockRPONumber(F);
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
@@ -1927,7 +2043,7 @@ bool GVN::processBlock(BasicBlock *BB) {
// Instantiate an expression in a predecessor that lacked it.
bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
- unsigned int ValNo) {
+ BasicBlock *Curr, unsigned int ValNo) {
// Because we are going top-down through the block, all value numbers
// will be available in the predecessor by the time we need them. Any
// that weren't originally present will have been instantiated earlier
@@ -1945,7 +2061,9 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
success = false;
break;
}
- if (Value *V = findLeader(Pred, VN.lookup(Op))) {
+ uint32_t TValNo =
+ VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
+ if (Value *V = findLeader(Pred, TValNo)) {
Instr->setOperand(i, V);
} else {
success = false;
@@ -1962,10 +2080,12 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
Instr->insertBefore(Pred->getTerminator());
Instr->setName(Instr->getName() + ".pre");
Instr->setDebugLoc(Instr->getDebugLoc());
- VN.add(Instr, ValNo);
+
+ unsigned Num = VN.lookupOrAdd(Instr);
+ VN.add(Instr, Num);
// Update the availability map to include the new instruction.
- addToLeaderTable(ValNo, Instr, Pred);
+ addToLeaderTable(Num, Instr, Pred);
return true;
}
@@ -2003,18 +2123,27 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
for (BasicBlock *P : predecessors(CurrentBlock)) {
- // We're not interested in PRE where the block is its
- // own predecessor, or in blocks with predecessors
- // that are not reachable.
- if (P == CurrentBlock) {
+ // We're not interested in PRE where blocks with predecessors that are
+ // not reachable.
+ if (!DT->isReachableFromEntry(P)) {
NumWithout = 2;
break;
- } else if (!DT->isReachableFromEntry(P)) {
+ }
+ // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and
+ // when CurInst has operand defined in CurrentBlock (so it may be defined
+ // by phi in the loop header).
+ if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] &&
+ any_of(CurInst->operands(), [&](const Use &U) {
+ if (auto *Inst = dyn_cast<Instruction>(U.get()))
+ return Inst->getParent() == CurrentBlock;
+ return false;
+ })) {
NumWithout = 2;
break;
}
- Value *predV = findLeader(P, ValNo);
+ uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
+ Value *predV = findLeader(P, TValNo);
if (!predV) {
predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
PREPred = P;
@@ -2054,7 +2183,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
}
// We need to insert somewhere, so let's give it a shot
PREInstr = CurInst->clone();
- if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) {
+ if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
// If we failed insertion, make sure we remove the instruction.
DEBUG(verifyRemoved(PREInstr));
PREInstr->deleteValue();
@@ -2168,6 +2297,7 @@ bool GVN::iterateOnFunction(Function &F) {
void GVN::cleanupGlobalSets() {
VN.clear();
LeaderTable.clear();
+ BlockRPONumber.clear();
TableAllocator.Reset();
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index c120036464d0a..05293eb0079fc 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -25,6 +25,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
@@ -576,7 +577,12 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
// Handle compare with phi operand, where the PHI is defined in this block.
if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
assert(Preference == WantInteger && "Compares only produce integers");
- PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+ Type *CmpType = Cmp->getType();
+ Value *CmpLHS = Cmp->getOperand(0);
+ Value *CmpRHS = Cmp->getOperand(1);
+ CmpInst::Predicate Pred = Cmp->getPredicate();
+
+ PHINode *PN = dyn_cast<PHINode>(CmpLHS);
if (PN && PN->getParent() == BB) {
const DataLayout &DL = PN->getModule()->getDataLayout();
// We can do this simplification if any comparisons fold to true or false.
@@ -584,15 +590,15 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PredBB = PN->getIncomingBlock(i);
Value *LHS = PN->getIncomingValue(i);
- Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+ Value *RHS = CmpRHS->DoPHITranslation(BB, PredBB);
- Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL});
+ Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
if (!Res) {
if (!isa<Constant>(RHS))
continue;
LazyValueInfo::Tristate
- ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+ ResT = LVI->getPredicateOnEdge(Pred, LHS,
cast<Constant>(RHS), PredBB, BB,
CxtI ? CxtI : Cmp);
if (ResT == LazyValueInfo::Unknown)
@@ -609,27 +615,67 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
- if (isa<Constant>(Cmp->getOperand(1)) && !Cmp->getType()->isVectorTy()) {
- Constant *CmpConst = cast<Constant>(Cmp->getOperand(1));
+ if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
+ Constant *CmpConst = cast<Constant>(CmpRHS);
- if (!isa<Instruction>(Cmp->getOperand(0)) ||
- cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+ if (!isa<Instruction>(CmpLHS) ||
+ cast<Instruction>(CmpLHS)->getParent() != BB) {
for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
LazyValueInfo::Tristate Res =
- LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ LVI->getPredicateOnEdge(Pred, CmpLHS,
CmpConst, P, BB, CxtI ? CxtI : Cmp);
if (Res == LazyValueInfo::Unknown)
continue;
- Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Constant *ResC = ConstantInt::get(CmpType, Res);
Result.push_back(std::make_pair(ResC, P));
}
return !Result.empty();
}
+ // InstCombine can fold some forms of constant range checks into
+ // (icmp (add (x, C1)), C2). See if we have we have such a thing with
+ // x as a live-in.
+ {
+ using namespace PatternMatch;
+ Value *AddLHS;
+ ConstantInt *AddConst;
+ if (isa<ConstantInt>(CmpConst) &&
+ match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
+ if (!isa<Instruction>(AddLHS) ||
+ cast<Instruction>(AddLHS)->getParent() != BB) {
+ for (BasicBlock *P : predecessors(BB)) {
+ // If the value is known by LazyValueInfo to be a ConstantRange in
+ // a predecessor, use that information to try to thread this
+ // block.
+ ConstantRange CR = LVI->getConstantRangeOnEdge(
+ AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
+ // Propagate the range through the addition.
+ CR = CR.add(AddConst->getValue());
+
+ // Get the range where the compare returns true.
+ ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(
+ Pred, cast<ConstantInt>(CmpConst)->getValue());
+
+ Constant *ResC;
+ if (CmpRange.contains(CR))
+ ResC = ConstantInt::getTrue(CmpType);
+ else if (CmpRange.inverse().contains(CR))
+ ResC = ConstantInt::getFalse(CmpType);
+ else
+ continue;
+
+ Result.push_back(std::make_pair(ResC, P));
+ }
+
+ return !Result.empty();
+ }
+ }
+ }
+
// Try to find a constant value for the LHS of a comparison,
// and evaluate it statically if we can.
PredValueInfoTy LHSVals;
@@ -638,8 +684,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
for (const auto &LHSVal : LHSVals) {
Constant *V = LHSVal.first;
- Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
- V, CmpConst);
+ Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
Result.push_back(std::make_pair(KC, LHSVal.second));
}
@@ -752,6 +797,37 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
+ // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
+ // BB code within one basic block `BB`), we need to invalidate the LVI
+ // information associated with BB, because the LVI information need not be
+ // true for all of BB after the merge. For example,
+ // Before the merge, LVI info and code is as follows:
+ // SinglePred: <LVI info1 for %p val>
+ // %y = use of %p
+ // call @exit() // need not transfer execution to successor.
+ // assume(%p) // from this point on %p is true
+ // br label %BB
+ // BB: <LVI info2 for %p val, i.e. %p is true>
+ // %x = use of %p
+ // br label exit
+ //
+ // Note that this LVI info for blocks BB and SinglPred is correct for %p
+ // (info2 and info1 respectively). After the merge and the deletion of the
+ // LVI info1 for SinglePred. We have the following code:
+ // BB: <LVI info2 for %p val>
+ // %y = use of %p
+ // call @exit()
+ // assume(%p)
+ // %x = use of %p <-- LVI info2 is correct from here onwards.
+ // br label exit
+ // LVI info2 for BB is incorrect at the beginning of BB.
+
+ // Invalidate LVI information for BB if the LVI is not provably true for
+ // all of BB.
+ if (any_of(*BB, [](Instruction &I) {
+ return !isGuaranteedToTransferExecutionToSuccessor(&I);
+ }))
+ LVI->eraseBlock(BB);
return true;
}
}
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
deleted file mode 100644
index 025ba1bfedc18..0000000000000
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This transformation combines adjacent loads.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/TargetFolder.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "load-combine"
-
-STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining");
-STATISTIC(NumLoadsCombined, "Number of loads combined");
-
-#define LDCOMBINE_NAME "Combine Adjacent Loads"
-
-namespace {
-struct PointerOffsetPair {
- Value *Pointer;
- APInt Offset;
-};
-
-struct LoadPOPPair {
- LoadInst *Load;
- PointerOffsetPair POP;
- /// \brief The new load needs to be created before the first load in IR order.
- unsigned InsertOrder;
-};
-
-class LoadCombine : public BasicBlockPass {
- LLVMContext *C;
- AliasAnalysis *AA;
- DominatorTree *DT;
-
-public:
- LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
- initializeLoadCombinePass(*PassRegistry::getPassRegistry());
- }
-
- using llvm::Pass::doInitialization;
- bool doInitialization(Function &) override;
- bool runOnBasicBlock(BasicBlock &BB) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-
- StringRef getPassName() const override { return LDCOMBINE_NAME; }
- static char ID;
-
- typedef IRBuilder<TargetFolder> BuilderTy;
-
-private:
- BuilderTy *Builder;
-
- PointerOffsetPair getPointerOffsetPair(LoadInst &);
- bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &);
- bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
- bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
-};
-}
-
-bool LoadCombine::doInitialization(Function &F) {
- DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
- C = &F.getContext();
- return true;
-}
-
-PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
- auto &DL = LI.getModule()->getDataLayout();
-
- PointerOffsetPair POP;
- POP.Pointer = LI.getPointerOperand();
- unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace());
- POP.Offset = APInt(BitWidth, 0);
-
- while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
- if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
- APInt LastOffset = POP.Offset;
- if (!GEP->accumulateConstantOffset(DL, POP.Offset)) {
- // Can't handle GEPs with variable indices.
- POP.Offset = LastOffset;
- return POP;
- }
- POP.Pointer = GEP->getPointerOperand();
- } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer)) {
- POP.Pointer = BC->getOperand(0);
- }
- }
- return POP;
-}
-
-bool LoadCombine::combineLoads(
- DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) {
- bool Combined = false;
- for (auto &Loads : LoadMap) {
- if (Loads.second.size() < 2)
- continue;
- std::sort(Loads.second.begin(), Loads.second.end(),
- [](const LoadPOPPair &A, const LoadPOPPair &B) {
- return A.POP.Offset.slt(B.POP.Offset);
- });
- if (aggregateLoads(Loads.second))
- Combined = true;
- }
- return Combined;
-}
-
-/// \brief Try to aggregate loads from a sorted list of loads to be combined.
-///
-/// It is guaranteed that no writes occur between any of the loads. All loads
-/// have the same base pointer. There are at least two loads.
-bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
- assert(Loads.size() >= 2 && "Insufficient loads!");
- LoadInst *BaseLoad = nullptr;
- SmallVector<LoadPOPPair, 8> AggregateLoads;
- bool Combined = false;
- bool ValidPrevOffset = false;
- APInt PrevOffset;
- uint64_t PrevSize = 0;
- for (auto &L : Loads) {
- if (ValidPrevOffset == false) {
- BaseLoad = L.Load;
- PrevOffset = L.POP.Offset;
- PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
- L.Load->getType());
- AggregateLoads.push_back(L);
- ValidPrevOffset = true;
- continue;
- }
- if (L.Load->getAlignment() > BaseLoad->getAlignment())
- continue;
- APInt PrevEnd = PrevOffset + PrevSize;
- if (L.POP.Offset.sgt(PrevEnd)) {
- // No other load will be combinable
- if (combineLoads(AggregateLoads))
- Combined = true;
- AggregateLoads.clear();
- ValidPrevOffset = false;
- continue;
- }
- if (L.POP.Offset != PrevEnd)
- // This load is offset less than the size of the last load.
- // FIXME: We may want to handle this case.
- continue;
- PrevOffset = L.POP.Offset;
- PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
- L.Load->getType());
- AggregateLoads.push_back(L);
- }
- if (combineLoads(AggregateLoads))
- Combined = true;
- return Combined;
-}
-
-/// \brief Given a list of combinable load. Combine the maximum number of them.
-bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
- // Remove loads from the end while the size is not a power of 2.
- unsigned TotalSize = 0;
- for (const auto &L : Loads)
- TotalSize += L.Load->getType()->getPrimitiveSizeInBits();
- while (TotalSize != 0 && !isPowerOf2_32(TotalSize))
- TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits();
- if (Loads.size() < 2)
- return false;
-
- DEBUG({
- dbgs() << "***** Combining Loads ******\n";
- for (const auto &L : Loads) {
- dbgs() << L.POP.Offset << ": " << *L.Load << "\n";
- }
- });
-
- // Find first load. This is where we put the new load.
- LoadPOPPair FirstLP;
- FirstLP.InsertOrder = -1u;
- for (const auto &L : Loads)
- if (L.InsertOrder < FirstLP.InsertOrder)
- FirstLP = L;
-
- unsigned AddressSpace =
- FirstLP.POP.Pointer->getType()->getPointerAddressSpace();
-
- Builder->SetInsertPoint(FirstLP.Load);
- Value *Ptr = Builder->CreateConstGEP1_64(
- Builder->CreatePointerCast(Loads[0].POP.Pointer,
- Builder->getInt8PtrTy(AddressSpace)),
- Loads[0].POP.Offset.getSExtValue());
- LoadInst *NewLoad = new LoadInst(
- Builder->CreatePointerCast(
- Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize),
- Ptr->getType()->getPointerAddressSpace())),
- Twine(Loads[0].Load->getName()) + ".combined", false,
- Loads[0].Load->getAlignment(), FirstLP.Load);
-
- for (const auto &L : Loads) {
- Builder->SetInsertPoint(L.Load);
- Value *V = Builder->CreateExtractInteger(
- L.Load->getModule()->getDataLayout(), NewLoad,
- cast<IntegerType>(L.Load->getType()),
- (L.POP.Offset - Loads[0].POP.Offset).getZExtValue(), "combine.extract");
- L.Load->replaceAllUsesWith(V);
- }
-
- NumLoadsCombined += Loads.size();
- return true;
-}
-
-bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
- if (skipBasicBlock(BB))
- return false;
-
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- // Skip analysing dead blocks (not forward reachable from function entry).
- if (!DT->isReachableFromEntry(&BB)) {
- DEBUG(dbgs() << "LC: skipping unreachable " << BB.getName() <<
- " in " << BB.getParent()->getName() << "\n");
- return false;
- }
-
- IRBuilder<TargetFolder> TheBuilder(
- BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
- Builder = &TheBuilder;
-
- DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap;
- AliasSetTracker AST(*AA);
-
- bool Combined = false;
- unsigned Index = 0;
- for (auto &I : BB) {
- if (I.mayThrow() || AST.containsUnknown(&I)) {
- if (combineLoads(LoadMap))
- Combined = true;
- LoadMap.clear();
- AST.clear();
- continue;
- }
- if (I.mayWriteToMemory()) {
- AST.add(&I);
- continue;
- }
- LoadInst *LI = dyn_cast<LoadInst>(&I);
- if (!LI)
- continue;
- ++NumLoadsAnalyzed;
- if (!LI->isSimple() || !LI->getType()->isIntegerTy())
- continue;
- auto POP = getPointerOffsetPair(*LI);
- if (!POP.Pointer)
- continue;
- LoadMap[POP.Pointer].push_back({LI, std::move(POP), Index++});
- AST.add(LI);
- }
- if (combineLoads(LoadMap))
- Combined = true;
- return Combined;
-}
-
-char LoadCombine::ID = 0;
-
-BasicBlockPass *llvm::createLoadCombinePass() {
- return new LoadCombine();
-}
-
-INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 3151ccd279c41..c41cc42db5e2c 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -31,20 +31,19 @@ using namespace llvm;
STATISTIC(NumDeleted, "Number of loops deleted");
/// This function deletes dead loops. The caller of this function needs to
-/// guarantee that the loop is infact dead. Here we handle two kinds of dead
+/// guarantee that the loop is infact dead. Here we handle two kinds of dead
/// loop. The first kind (\p isLoopDead) is where only invariant values from
/// within the loop are used outside of it. The second kind (\p
/// isLoopNeverExecuted) is where the loop is provably never executed. We can
-/// always remove never executed loops since they will not cause any
-/// difference to program behaviour.
+/// always remove never executed loops since they will not cause any difference
+/// to program behaviour.
///
/// This also updates the relevant analysis information in \p DT, \p SE, and \p
/// LI. It also updates the loop PM if an updater struct is provided.
// TODO: This function will be used by loop-simplifyCFG as well. So, move this
// to LoopUtils.cpp
static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
- LoopInfo &LI, bool LoopIsNeverExecuted,
- LPMUpdater *Updater = nullptr);
+ LoopInfo &LI, LPMUpdater *Updater = nullptr);
/// Determines if a loop is dead.
///
/// This assumes that we've already checked for unique exit and exiting blocks,
@@ -168,7 +167,14 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
BasicBlock *ExitBlock = L->getUniqueExitBlock();
if (ExitBlock && isLoopNeverExecuted(L)) {
- deleteDeadLoop(L, DT, SE, LI, true /* LoopIsNeverExecuted */, Updater);
+ // Set incoming value to undef for phi nodes in the exit block.
+ BasicBlock::iterator BI = ExitBlock->begin();
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ for (unsigned i = 0; i < P->getNumIncomingValues(); i++)
+ P->setIncomingValue(i, UndefValue::get(P->getType()));
+ BI++;
+ }
+ deleteDeadLoop(L, DT, SE, LI, Updater);
++NumDeleted;
return true;
}
@@ -196,15 +202,14 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
if (isa<SCEVCouldNotCompute>(S))
return Changed;
- deleteDeadLoop(L, DT, SE, LI, false /* LoopIsNeverExecuted */, Updater);
+ deleteDeadLoop(L, DT, SE, LI, Updater);
++NumDeleted;
return true;
}
static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
- LoopInfo &LI, bool LoopIsNeverExecuted,
- LPMUpdater *Updater) {
+ LoopInfo &LI, LPMUpdater *Updater) {
assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
auto *Preheader = L->getLoopPreheader();
assert(Preheader && "Preheader should exist!");
@@ -227,6 +232,8 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
auto *ExitBlock = L->getUniqueExitBlock();
assert(ExitBlock && "Should have a unique exit block!");
+ assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
+
// Connect the preheader directly to the exit block.
// Even when the loop is never executed, we cannot remove the edge from the
// source block to the exit block. Consider the case where the unexecuted loop
@@ -236,20 +243,28 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
// non-loop, it will be deleted in a future iteration of loop deletion pass.
Preheader->getTerminator()->replaceUsesOfWith(L->getHeader(), ExitBlock);
- SmallVector<BasicBlock *, 4> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
// Rewrite phis in the exit block to get their inputs from the Preheader
// instead of the exiting block.
- BasicBlock *ExitingBlock = ExitingBlocks[0];
BasicBlock::iterator BI = ExitBlock->begin();
while (PHINode *P = dyn_cast<PHINode>(BI)) {
- int j = P->getBasicBlockIndex(ExitingBlock);
- assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
- if (LoopIsNeverExecuted)
- P->setIncomingValue(j, UndefValue::get(P->getType()));
- P->setIncomingBlock(j, Preheader);
- for (unsigned i = 1; i < ExitingBlocks.size(); ++i)
- P->removeIncomingValue(ExitingBlocks[i]);
+ // Set the zero'th element of Phi to be from the preheader and remove all
+ // other incoming values. Given the loop has dedicated exits, all other
+ // incoming values must be from the exiting blocks.
+ int PredIndex = 0;
+ P->setIncomingBlock(PredIndex, Preheader);
+ // Removes all incoming values from all other exiting blocks (including
+ // duplicate values from an exiting block).
+ // Nuke all entries except the zero'th entry which is the preheader entry.
+ // NOTE! We need to remove Incoming Values in the reverse order as done
+ // below, to keep the indices valid for deletion (removeIncomingValues
+ // updates getNumIncomingValues and shifts all values down into the operand
+ // being deleted).
+ for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i)
+ P->removeIncomingValue(e-i, false);
+
+ assert((P->getNumIncomingValues() == 1 &&
+ P->getIncomingBlock(PredIndex) == Preheader) &&
+ "Should have exactly one value and that's from the preheader!");
++BI;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b027278b24f2e..73436f13c94e4 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -131,7 +131,7 @@ static cl::opt<bool> EnablePhiElim(
// The flag adds instruction count to solutions cost comparision.
static cl::opt<bool> InsnsCost(
- "lsr-insns-cost", cl::Hidden, cl::init(true),
+ "lsr-insns-cost", cl::Hidden, cl::init(false),
cl::desc("Add instruction count to a LSR cost model"));
// Flag to choose how to narrow complex lsr solution
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index cbbd55512c9f5..7a7624f775429 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -1244,27 +1244,24 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
// only do this for simple stores, we should expand to cover memcpys, etc.
const auto *LastStore = createStoreExpression(SI, StoreRHS);
const auto *LastCC = ExpressionToClass.lookup(LastStore);
- // Basically, check if the congruence class the store is in is defined by a
- // store that isn't us, and has the same value. MemorySSA takes care of
- // ensuring the store has the same memory state as us already.
- // The RepStoredValue gets nulled if all the stores disappear in a class, so
- // we don't need to check if the class contains a store besides us.
- if (LastCC &&
- LastCC->getStoredValue() == lookupOperandLeader(SI->getValueOperand()))
+ // We really want to check whether the expression we matched was a store. No
+ // easy way to do that. However, we can check that the class we found has a
+ // store, which, assuming the value numbering state is not corrupt, is
+ // sufficient, because we must also be equivalent to that store's expression
+ // for it to be in the same class as the load.
+ if (LastCC && LastCC->getStoredValue() == LastStore->getStoredValue())
return LastStore;
- deleteExpression(LastStore);
// Also check if our value operand is defined by a load of the same memory
// location, and the memory state is the same as it was then (otherwise, it
// could have been overwritten later. See test32 in
// transforms/DeadStoreElimination/simple.ll).
- if (auto *LI =
- dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) {
+ if (auto *LI = dyn_cast<LoadInst>(LastStore->getStoredValue()))
if ((lookupOperandLeader(LI->getPointerOperand()) ==
- lookupOperandLeader(SI->getPointerOperand())) &&
+ LastStore->getOperand(0)) &&
(lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) ==
StoreRHS))
- return createStoreExpression(SI, StoreRHS);
- }
+ return LastStore;
+ deleteExpression(LastStore);
}
// If the store is not equivalent to anything, value number it as a store that
@@ -2332,9 +2329,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
// see if we know some constant value for it already.
Value *NewGVN::findConditionEquivalence(Value *Cond) const {
auto Result = lookupOperandLeader(Cond);
- if (isa<Constant>(Result))
- return Result;
- return nullptr;
+ return isa<Constant>(Result) ? Result : nullptr;
}
// Process the outgoing edges of a block for reachability.
@@ -3014,14 +3009,27 @@ void NewGVN::verifyIterationSettled(Function &F) {
// a no-longer valid StoreExpression.
void NewGVN::verifyStoreExpressions() const {
#ifndef NDEBUG
- DenseSet<std::pair<const Value *, const Value *>> StoreExpressionSet;
+ // This is the only use of this, and it's not worth defining a complicated
+ // densemapinfo hash/equality function for it.
+ std::set<
+ std::pair<const Value *,
+ std::tuple<const Value *, const CongruenceClass *, Value *>>>
+ StoreExpressionSet;
for (const auto &KV : ExpressionToClass) {
if (auto *SE = dyn_cast<StoreExpression>(KV.first)) {
// Make sure a version that will conflict with loads is not already there
- auto Res =
- StoreExpressionSet.insert({SE->getOperand(0), SE->getMemoryLeader()});
- assert(Res.second &&
- "Stored expression conflict exists in expression table");
+ auto Res = StoreExpressionSet.insert(
+ {SE->getOperand(0), std::make_tuple(SE->getMemoryLeader(), KV.second,
+ SE->getStoredValue())});
+ bool Okay = Res.second;
+ // It's okay to have the same expression already in there if it is
+ // identical in nature.
+ // This can happen when the leader of the stored value changes over time.
+ if (!Okay)
+ Okay = (std::get<1>(Res.first->second) == KV.second) &&
+ (lookupOperandLeader(std::get<2>(Res.first->second)) ==
+ lookupOperandLeader(SE->getStoredValue()));
+ assert(Okay && "Stored expression conflict exists in expression table");
auto *ValueExpr = ValueToExpression.lookup(SE->getStoreInst());
assert(ValueExpr && ValueExpr->equals(*SE) &&
"StoreExpression in ExpressionToClass is not latest "
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index a20890b22603e..6da551bd7efd6 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -106,11 +107,12 @@ XorOpnd::XorOpnd(Value *V) {
I->getOpcode() == Instruction::And)) {
Value *V0 = I->getOperand(0);
Value *V1 = I->getOperand(1);
- if (isa<ConstantInt>(V0))
+ const APInt *C;
+ if (match(V0, PatternMatch::m_APInt(C)))
std::swap(V0, V1);
- if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) {
- ConstPart = C->getValue();
+ if (match(V1, PatternMatch::m_APInt(C))) {
+ ConstPart = *C;
SymbolicPart = V0;
isOr = (I->getOpcode() == Instruction::Or);
return;
@@ -119,7 +121,7 @@ XorOpnd::XorOpnd(Value *V) {
// view the operand as "V | 0"
SymbolicPart = V;
- ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth());
+ ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits());
isOr = true;
}
@@ -955,8 +957,8 @@ static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
/// Scan backwards and forwards among values with the same rank as element i
/// to see if X exists. If X does not exist, return i. This is useful when
/// scanning for 'x' when we see '-x' because they both get the same rank.
-static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
- Value *X) {
+static unsigned FindInOperandList(const SmallVectorImpl<ValueEntry> &Ops,
+ unsigned i, Value *X) {
unsigned XRank = Ops[i].Rank;
unsigned e = Ops.size();
for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
@@ -1134,20 +1136,19 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
/// instruction. There are two special cases: 1) if the constant operand is 0,
/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
/// be returned.
-static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
+static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
const APInt &ConstOpnd) {
- if (ConstOpnd != 0) {
- if (!ConstOpnd.isAllOnesValue()) {
- LLVMContext &Ctx = Opnd->getType()->getContext();
- Instruction *I;
- I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd),
- "and.ra", InsertBefore);
- I->setDebugLoc(InsertBefore->getDebugLoc());
- return I;
- }
+ if (ConstOpnd.isNullValue())
+ return nullptr;
+
+ if (ConstOpnd.isAllOnesValue())
return Opnd;
- }
- return nullptr;
+
+ Instruction *I = BinaryOperator::CreateAnd(
+ Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
+ InsertBefore);
+ I->setDebugLoc(InsertBefore->getDebugLoc());
+ return I;
}
// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
@@ -1163,24 +1164,24 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
// = ((x | c1) ^ c1) ^ (c1 ^ c2)
// = (x & ~c1) ^ (c1 ^ c2)
// It is useful only when c1 == c2.
- if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) {
- if (!Opnd1->getValue()->hasOneUse())
- return false;
+ if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isNullValue())
+ return false;
- const APInt &C1 = Opnd1->getConstPart();
- if (C1 != ConstOpnd)
- return false;
+ if (!Opnd1->getValue()->hasOneUse())
+ return false;
- Value *X = Opnd1->getSymbolicPart();
- Res = createAndInstr(I, X, ~C1);
- // ConstOpnd was C2, now C1 ^ C2.
- ConstOpnd ^= C1;
+ const APInt &C1 = Opnd1->getConstPart();
+ if (C1 != ConstOpnd)
+ return false;
- if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
- RedoInsts.insert(T);
- return true;
- }
- return false;
+ Value *X = Opnd1->getSymbolicPart();
+ Res = createAndInstr(I, X, ~C1);
+ // ConstOpnd was C2, now C1 ^ C2.
+ ConstOpnd ^= C1;
+
+ if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+ RedoInsts.insert(T);
+ return true;
}
@@ -1221,8 +1222,8 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt C3((~C1) ^ C2);
// Do not increase code size!
- if (C3 != 0 && !C3.isAllOnesValue()) {
- int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+ if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+ int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
}
@@ -1238,8 +1239,8 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt C3 = C1 ^ C2;
// Do not increase code size
- if (C3 != 0 && !C3.isAllOnesValue()) {
- int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+ if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+ int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
}
@@ -1279,17 +1280,20 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
SmallVector<XorOpnd, 8> Opnds;
SmallVector<XorOpnd*, 8> OpndPtrs;
Type *Ty = Ops[0].Op->getType();
- APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
+ APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
// Step 1: Convert ValueEntry to XorOpnd
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
Value *V = Ops[i].Op;
- if (!isa<ConstantInt>(V)) {
+ const APInt *C;
+ // TODO: Support non-splat vectors.
+ if (match(V, PatternMatch::m_APInt(C))) {
+ ConstOpnd ^= *C;
+ } else {
XorOpnd O(V);
O.setSymbolicRank(getRank(O.getSymbolicPart()));
Opnds.push_back(O);
- } else
- ConstOpnd ^= cast<ConstantInt>(V)->getValue();
+ }
}
// NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
@@ -1327,7 +1331,8 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
Value *CV;
// Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
- if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+ if (!ConstOpnd.isNullValue() &&
+ CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
Changed = true;
if (CV)
*CurrOpnd = XorOpnd(CV);
@@ -1369,17 +1374,17 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
ValueEntry VE(getRank(O.getValue()), O.getValue());
Ops.push_back(VE);
}
- if (ConstOpnd != 0) {
- Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd);
+ if (!ConstOpnd.isNullValue()) {
+ Value *C = ConstantInt::get(Ty, ConstOpnd);
ValueEntry VE(getRank(C), C);
Ops.push_back(VE);
}
- int Sz = Ops.size();
+ unsigned Sz = Ops.size();
if (Sz == 1)
return Ops.back().Op;
- else if (Sz == 0) {
- assert(ConstOpnd == 0);
- return ConstantInt::get(Ty->getContext(), ConstOpnd);
+ if (Sz == 0) {
+ assert(ConstOpnd.isNullValue());
+ return ConstantInt::get(Ty, ConstOpnd);
}
}
@@ -1627,8 +1632,8 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
///
/// \returns Whether any factors have a power greater than one.
-bool ReassociatePass::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
- SmallVectorImpl<Factor> &Factors) {
+static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
+ SmallVectorImpl<Factor> &Factors) {
// FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
// Compute the sum of powers of simplifiable factors.
unsigned FactorPowerSum = 0;
@@ -1999,11 +2004,6 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
if (I->isCommutative())
canonicalizeOperands(I);
- // TODO: We should optimize vector Xor instructions, but they are
- // currently unsupported.
- if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor)
- return;
-
// Don't optimize floating point instructions that don't have unsafe algebra.
if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra())
return;
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index c6929c33b3e9e..7a6fa1711411d 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -536,9 +536,10 @@ private:
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitFenceInst (FenceInst &I) { /*returns void*/ }
void visitInstruction(Instruction &I) {
- // If a new instruction is added to LLVM that we don't handle.
+ // All the instructions we don't do any special handling for just
+ // go to overdefined.
DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n');
- markOverdefined(&I); // Just in case
+ markOverdefined(&I);
}
};
@@ -1814,15 +1815,11 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
if (F.isDeclaration())
continue;
- if (Solver.isBlockExecutable(&F.front())) {
+ if (Solver.isBlockExecutable(&F.front()))
for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;
- ++AI) {
- if (AI->use_empty())
- continue;
- if (tryToReplaceWithConstant(Solver, &*AI))
+ ++AI)
+ if (!AI->use_empty() && tryToReplaceWithConstant(Solver, &*AI))
++IPNumArgsElimed;
- }
- }
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
if (!Solver.isBlockExecutable(&*BB)) {
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 1527f15f18a33..80fbbeb6829bb 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1252,7 +1252,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) {
if (!LI || !LI->isSimple())
return false;
- // Both operands to the select need to be dereferencable, either
+ // Both operands to the select need to be dereferenceable, either
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI))
@@ -1637,8 +1637,17 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
return cast<PointerType>(NewTy)->getPointerAddressSpace() ==
cast<PointerType>(OldTy)->getPointerAddressSpace();
}
- if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
- return true;
+
+ // We can convert integers to integral pointers, but not to non-integral
+ // pointers.
+ if (OldTy->isIntegerTy())
+ return !DL.isNonIntegralPointerType(NewTy);
+
+ // We can convert integral pointers to integers, but non-integral pointers
+ // need to remain pointers.
+ if (!DL.isNonIntegralPointerType(OldTy))
+ return NewTy->isIntegerTy();
+
return false;
}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 850a01114eeba..ce6f93eb0c15f 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -91,7 +91,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSeparateConstOffsetFromGEPPass(Registry);
initializeSpeculativeExecutionLegacyPassPass(Registry);
initializeStraightLineStrengthReducePass(Registry);
- initializeLoadCombinePass(Registry);
initializePlaceBackedgeSafepointsImplPass(Registry);
initializePlaceSafepointsPass(Registry);
initializeFloat2IntLegacyPassPass(Registry);
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 3e5993618c4c0..9397b87cdf563 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -321,7 +321,7 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls) {
/// instruction from after the call to before the call, assuming that all
/// instructions between the call and this instruction are movable.
///
-static bool canMoveAboveCall(Instruction *I, CallInst *CI) {
+static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
// FIXME: We can move load/store/call/free instructions above the call if the
// call does not mod/ref the memory location being processed.
if (I->mayHaveSideEffects()) // This also handles volatile loads.
@@ -332,10 +332,10 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI) {
if (CI->mayHaveSideEffects()) {
// Non-volatile loads may be moved above a call with side effects if it
// does not write to memory and the load provably won't trap.
- // FIXME: Writes to memory only matter if they may alias the pointer
+ // Writes to memory only matter if they may alias the pointer
// being loaded from.
const DataLayout &DL = L->getModule()->getDataLayout();
- if (CI->mayWriteToMemory() ||
+ if ((AA->getModRefInfo(CI, MemoryLocation::get(L)) & MRI_Mod) ||
!isSafeToLoadUnconditionally(L->getPointerOperand(),
L->getAlignment(), DL, L))
return false;
@@ -492,10 +492,11 @@ static CallInst *findTRECandidate(Instruction *TI,
return CI;
}
-static bool
-eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs) {
+static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
+ AliasAnalysis *AA) {
// If we are introducing accumulator recursion to eliminate operations after
// the call instruction that are both associative and commutative, the initial
// value for the accumulator is placed in this variable. If this value is set
@@ -515,7 +516,8 @@ eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry,
// Check that this is the case now.
BasicBlock::iterator BBI(CI);
for (++BBI; &*BBI != Ret; ++BBI) {
- if (canMoveAboveCall(&*BBI, CI)) continue;
+ if (canMoveAboveCall(&*BBI, CI, AA))
+ continue;
// If we can't move the instruction above the call, it might be because it
// is an associative and commutative operation that could be transformed
@@ -674,12 +676,17 @@ static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret,
bool &TailCallsAreMarkedTail,
SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ AliasAnalysis *AA) {
bool Change = false;
+ // Make sure this block is a trivial return block.
+ assert(BB->getFirstNonPHIOrDbg() == Ret &&
+ "Trying to fold non-trivial return block");
+
// If the return block contains nothing but the return and PHI's,
// there might be an opportunity to duplicate the return in its
- // predecessors and perform TRC there. Look for predecessors that end
+ // predecessors and perform TRE there. Look for predecessors that end
// in unconditional branch and recursive call(s).
SmallVector<BranchInst*, 8> UncondBranchPreds;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
@@ -706,7 +713,7 @@ static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret,
BB->eraseFromParent();
eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs);
+ ArgumentPHIs, AA);
++NumRetDuped;
Change = true;
}
@@ -719,16 +726,18 @@ static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ AliasAnalysis *AA) {
CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI);
if (!CI)
return false;
return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs);
+ ArgumentPHIs, AA);
}
-static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI) {
+static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
+ AliasAnalysis *AA) {
if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
return false;
@@ -763,11 +772,11 @@ static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI)
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change =
processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, !CanTRETailMarkedCall, TTI);
+ ArgumentPHIs, !CanTRETailMarkedCall, TTI, AA);
if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change =
- foldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, !CanTRETailMarkedCall, TTI);
+ Change = foldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ !CanTRETailMarkedCall, TTI, AA);
MadeChange |= Change;
}
}
@@ -797,6 +806,7 @@ struct TailCallElim : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
@@ -805,7 +815,8 @@ struct TailCallElim : public FunctionPass {
return false;
return eliminateTailRecursion(
- F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F));
+ F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
+ &getAnalysis<AAResultsWrapperPass>().getAAResults());
}
};
}
@@ -826,8 +837,9 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
FunctionAnalysisManager &AM) {
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+ AliasAnalysis &AA = AM.getResult<AAManager>(F);
- bool Changed = eliminateTailRecursion(F, &TTI);
+ bool Changed = eliminateTailRecursion(F, &TTI, &AA);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index ebde1f9a17dd6..b60dfb4f3541d 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -116,6 +116,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_wcslen:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_strchr:
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 2af671636cbdb..5127eba3f9aea 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
@@ -1081,7 +1082,7 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr,
}
/// See if there is a dbg.value intrinsic for DIVar for the PHI node.
-static bool PhiHasDebugValue(DILocalVariable *DIVar,
+static bool PhiHasDebugValue(DILocalVariable *DIVar,
DIExpression *DIExpr,
PHINode *APN) {
// Since we can't guarantee that the original dbg.declare instrinsic
@@ -1159,7 +1160,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
DbgValue->insertAfter(LI);
}
-/// Inserts a llvm.dbg.value intrinsic after a phi
+/// Inserts a llvm.dbg.value intrinsic after a phi
/// that has an associated llvm.dbg.decl intrinsic.
void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
PHINode *APN, DIBuilder &Builder) {
@@ -1742,12 +1743,12 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
// Preserve !invariant.group in K.
break;
case LLVMContext::MD_align:
- K->setMetadata(Kind,
+ K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
case LLVMContext::MD_dereferenceable:
case LLVMContext::MD_dereferenceable_or_null:
- K->setMetadata(Kind,
+ K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
}
@@ -1847,6 +1848,49 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
return false;
}
+void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
+ LoadInst &NewLI) {
+ auto *NewTy = NewLI.getType();
+
+ // This only directly applies if the new type is also a pointer.
+ if (NewTy->isPointerTy()) {
+ NewLI.setMetadata(LLVMContext::MD_nonnull, N);
+ return;
+ }
+
+ // The only other translation we can do is to integral loads with !range
+ // metadata.
+ if (!NewTy->isIntegerTy())
+ return;
+
+ MDBuilder MDB(NewLI.getContext());
+ const Value *Ptr = OldLI.getPointerOperand();
+ auto *ITy = cast<IntegerType>(NewTy);
+ auto *NullInt = ConstantExpr::getPtrToInt(
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+ auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+ NewLI.setMetadata(LLVMContext::MD_range,
+ MDB.createRange(NonNullInt, NullInt));
+}
+
+void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
+ MDNode *N, LoadInst &NewLI) {
+ auto *NewTy = NewLI.getType();
+
+ // Give up unless it is converted to a pointer where there is a single very
+ // valuable mapping we can do reliably.
+ // FIXME: It would be nice to propagate this in more ways, but the type
+ // conversions make it hard.
+ if (!NewTy->isPointerTy())
+ return;
+
+ unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
+ if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+ MDNode *NN = MDNode::get(OldLI.getContext(), None);
+ NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
+ }
+}
+
namespace {
/// A potential constituent of a bitreverse or bswap expression. See
/// collectBitParts for a fuller explanation.
@@ -1968,7 +2012,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
unsigned NumMaskedBits = AndMask.countPopulation();
if (!MatchBitReversals && NumMaskedBits % 8 != 0)
return Result;
-
+
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
MatchBitReversals, BPS);
if (!Res)
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index f3db278ef1e49..e21e34df8ded0 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -72,7 +72,6 @@ using namespace llvm;
#define DEBUG_TYPE "loop-simplify"
-STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
STATISTIC(NumNested , "Number of nested loops split out");
// If the block isn't already, move the new block to right after some 'outside
@@ -152,37 +151,6 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
return PreheaderBB;
}
-/// \brief Ensure that the loop preheader dominates all exit blocks.
-///
-/// This method is used to split exit blocks that have predecessors outside of
-/// the loop.
-static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
- DominatorTree *DT, LoopInfo *LI,
- bool PreserveLCSSA) {
- SmallVector<BasicBlock*, 8> LoopBlocks;
- for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
- BasicBlock *P = *I;
- if (L->contains(P)) {
- // Don't do this if the loop is exited via an indirect branch.
- if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
-
- LoopBlocks.push_back(P);
- }
- }
-
- assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
- BasicBlock *NewExitBB = nullptr;
-
- NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI,
- PreserveLCSSA);
- if (!NewExitBB)
- return nullptr;
-
- DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
- << NewExitBB->getName() << "\n");
- return NewExitBB;
-}
-
/// Add the specified block, and all of its predecessors, to the specified set,
/// if it's not already in there. Stop predecessor traversal when we reach
/// StopBlock.
@@ -346,16 +314,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
// Split edges to exit blocks from the inner loop, if they emerged in the
// process of separating the outer one.
- SmallVector<BasicBlock *, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
- ExitBlocks.end());
- for (BasicBlock *ExitBlock : ExitBlockSet) {
- if (any_of(predecessors(ExitBlock),
- [L](BasicBlock *BB) { return !L->contains(BB); })) {
- rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA);
- }
- }
+ formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
if (PreserveLCSSA) {
// Fix LCSSA form for L. Some values, which previously were only used inside
@@ -563,29 +522,16 @@ ReprocessLoop:
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
- if (Preheader) {
- ++NumInserted;
+ if (Preheader)
Changed = true;
- }
}
// Next, check to make sure that all exit nodes of the loop only have
// predecessors that are inside of the loop. This check guarantees that the
// loop preheader/header will dominate the exit blocks. If the exit block has
// predecessors from outside of the loop, split the edge now.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
-
- SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
- ExitBlocks.end());
- for (BasicBlock *ExitBlock : ExitBlockSet) {
- if (any_of(predecessors(ExitBlock),
- [L](BasicBlock *BB) { return !L->contains(BB); })) {
- rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA);
- ++NumInserted;
- Changed = true;
- }
- }
+ if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA))
+ Changed = true;
// If the header has more than two predecessors at this point (from the
// preheader and from multiple backedges), we must adjust the loop.
@@ -614,10 +560,8 @@ ReprocessLoop:
// insert a new block that all backedges target, then make it jump to the
// loop header.
LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
- if (LoopLatch) {
- ++NumInserted;
+ if (LoopLatch)
Changed = true;
- }
}
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
@@ -645,7 +589,22 @@ ReprocessLoop:
// loop-invariant instructions out of the way to open up more
// opportunities, and the disadvantage of having the responsibility
// to preserve dominator information.
- if (ExitBlockSet.size() == 1) {
+ auto HasUniqueExitBlock = [&]() {
+ BasicBlock *UniqueExit = nullptr;
+ for (auto *ExitingBB : ExitingBlocks)
+ for (auto *SuccBB : successors(ExitingBB)) {
+ if (L->contains(SuccBB))
+ continue;
+
+ if (!UniqueExit)
+ UniqueExit = SuccBB;
+ else if (UniqueExit != SuccBB)
+ return false;
+ }
+
+ return true;
+ };
+ if (HasUniqueExitBlock()) {
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
if (!ExitingBlock->getSinglePredecessor()) continue;
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index a920cd86a26a8..5f85e17927fa2 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -472,10 +472,22 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// exit block only.
if (!L->isLoopSimplifyForm())
return false;
- BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
- if (!Exit)
- return false;
+ // Guaranteed by LoopSimplifyForm.
+ BasicBlock *Latch = L->getLoopLatch();
+
+ BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop
+ if (!LatchExit)
+ return false;
+ // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
+ // targets of the Latch be the single exit block out of the loop. This needs
+ // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ assert((LatchBR->getSuccessor(0) == LatchExit ||
+ LatchBR->getSuccessor(1) == LatchExit) &&
+ "one of the loop latch successors should be "
+ "the exit block!");
+ (void)LatchBR;
// Use Scalar Evolution to compute the trip count. This allows more loops to
// be unrolled than relying on induction var simplification.
if (!SE)
@@ -510,25 +522,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (Log2_32(Count) > BEWidth)
return false;
- BasicBlock *Latch = L->getLoopLatch();
-
- // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
- // targets of the Latch be the single exit block out of the loop. This needs
- // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
- BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- assert(
- (LatchBR->getSuccessor(0) == Exit || LatchBR->getSuccessor(1) == Exit) &&
- "one of the loop latch successors should be "
- "the exit block!");
- // Avoid warning of unused `LatchBR` variable in release builds.
- (void)LatchBR;
// Loop structure is the following:
//
// PreHeader
// Header
// ...
// Latch
- // Exit
+ // LatchExit
BasicBlock *NewPreHeader;
BasicBlock *NewExit = nullptr;
@@ -541,9 +541,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Split PreHeader to insert a branch around loop for unrolling.
NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
NewPreHeader->setName(PreHeader->getName() + ".new");
- // Split Exit to create phi nodes from branch above.
- SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
- NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
+ // Split LatchExit to create phi nodes from branch above.
+ SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
+ NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
DT, LI, PreserveLCSSA);
// Split NewExit to insert epilog remainder loop.
EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
@@ -570,7 +570,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Latch Header
// *NewExit ...
// *EpilogPreHeader Latch
- // Exit Exit
+ // LatchExit LatchExit
// Calculate conditions for branch around loop for unrolling
// in epilog case and around prolog remainder loop in prolog case.
@@ -648,7 +648,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
// iterations. This function adds the appropriate CFG connections.
- BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
+ BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
@@ -672,7 +672,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// EpilogHeader Header
// ... ...
// EpilogLatch Latch
- // Exit Exit
+ // LatchExit LatchExit
// Rewrite the cloned instruction operands to use the values created when the
// clone is created.
@@ -686,7 +686,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (UseEpilogRemainder) {
// Connect the epilog code to the original loop and update the
// PHI functions.
- ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
+ ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
EpilogPreHeader, NewPreHeader, VMap, DT, LI,
PreserveLCSSA);
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 412f6129407ed..0ed33945ef407 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
@@ -29,6 +30,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -87,8 +89,7 @@ RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
// Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
// with a new integer type of the corresponding bit width.
- if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)),
- m_And(m_APInt(M), m_Instruction(I))))) {
+ if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
int32_t Bits = (*M + 1).exactLogBase2();
if (Bits > 0) {
RT = IntegerType::get(Phi->getContext(), Bits);
@@ -923,6 +924,69 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
return true;
}
+bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA) {
+ bool Changed = false;
+
+ // We re-use a vector for the in-loop predecesosrs.
+ SmallVector<BasicBlock *, 4> InLoopPredecessors;
+
+ auto RewriteExit = [&](BasicBlock *BB) {
+ assert(InLoopPredecessors.empty() &&
+ "Must start with an empty predecessors list!");
+ auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
+
+ // See if there are any non-loop predecessors of this exit block and
+ // keep track of the in-loop predecessors.
+ bool IsDedicatedExit = true;
+ for (auto *PredBB : predecessors(BB))
+ if (L->contains(PredBB)) {
+ if (isa<IndirectBrInst>(PredBB->getTerminator()))
+ // We cannot rewrite exiting edges from an indirectbr.
+ return false;
+
+ InLoopPredecessors.push_back(PredBB);
+ } else {
+ IsDedicatedExit = false;
+ }
+
+ assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
+
+ // Nothing to do if this is already a dedicated exit.
+ if (IsDedicatedExit)
+ return false;
+
+ auto *NewExitBB = SplitBlockPredecessors(
+ BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
+
+ if (!NewExitBB)
+ DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
+ << *L << "\n");
+ else
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+ << NewExitBB->getName() << "\n");
+ return true;
+ };
+
+ // Walk the exit blocks directly rather than building up a data structure for
+ // them, but only visit each one once.
+ SmallPtrSet<BasicBlock *, 4> Visited;
+ for (auto *BB : L->blocks())
+ for (auto *SuccBB : successors(BB)) {
+ // We're looking for exit blocks so skip in-loop successors.
+ if (L->contains(SuccBB))
+ continue;
+
+ // Visit each exit block exactly once.
+ if (!Visited.insert(SuccBB).second)
+ continue;
+
+ Changed |= RewriteExit(SuccBB);
+ }
+
+ return Changed;
+}
+
/// \brief Returns the instructions that use values defined in the loop.
SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
SmallVector<Instruction *, 8> UsedOutside;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1abdb24848506..eac2867233bc0 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5702,14 +5702,14 @@ bool LoopVectorizationLegality::memoryInstructionCanBeWidened(Instruction *I,
void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
// We should not collect Uniforms more than once per VF. Right now,
- // this function is called from collectUniformsAndScalars(), which
+ // this function is called from collectUniformsAndScalars(), which
// already does this check. Collecting Uniforms for VF=1 does not make any
// sense.
assert(VF >= 2 && !Uniforms.count(VF) &&
"This function should not be visited twice for the same VF");
- // Visit the list of Uniforms. If we'll not find any uniform value, we'll
+ // Visit the list of Uniforms. If we'll not find any uniform value, we'll
// not analyze again. Uniforms.count(VF) will return 1.
Uniforms[VF].clear();
@@ -5988,10 +5988,10 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
continue;
Value *Ptr = getPointerOperand(&I);
- // We don't check wrapping here because we don't know yet if Ptr will be
- // part of a full group or a group with gaps. Checking wrapping for all
+ // We don't check wrapping here because we don't know yet if Ptr will be
+ // part of a full group or a group with gaps. Checking wrapping for all
// pointers (even those that end up in groups with no gaps) will be overly
- // conservative. For full groups, wrapping should be ok since if we would
+ // conservative. For full groups, wrapping should be ok since if we would
// wrap around the address space we would do a memory access at nullptr
// even without the transformation. The wrapping checks are therefore
// deferred until after we've formed the interleaved groups.
@@ -6244,7 +6244,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
if (LastMember) {
Value *LastMemberPtr = getPointerOperand(LastMember);
- if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
/*ShouldCheckWrap=*/true)) {
DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
"last group member potentially pointer-wrapping.\n");
@@ -6252,9 +6252,9 @@ void InterleavedAccessInfo::analyzeInterleaving(
}
} else {
// Case 3: A non-reversed interleaved load group with gaps: We need
- // to execute at least one scalar epilogue iteration. This will ensure
+ // to execute at least one scalar epilogue iteration. This will ensure
// we don't speculatively access memory out-of-bounds. We only need
- // to look for a member at index factor - 1, since every group must have
+ // to look for a member at index factor - 1, since every group must have
// a member at index zero.
if (Group->isReverse()) {
releaseGroup(Group);
@@ -7789,8 +7789,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check the loop for a trip count threshold:
// do not vectorize loops with a tiny trip count.
- const unsigned MaxTC = SE->getSmallConstantMaxTripCount(L);
- if (MaxTC > 0u && MaxTC < TinyTripCountVectorThreshold) {
+ unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L);
+ bool HasExpectedTC = (ExpectedTC > 0);
+
+ if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) {
+ auto EstimatedTC = getLoopEstimatedTripCount(L);
+ if (EstimatedTC) {
+ ExpectedTC = *EstimatedTC;
+ HasExpectedTC = true;
+ }
+ }
+
+ if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
<< "This loop is not worth vectorizing.");
if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
@@ -7822,18 +7832,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool OptForSize =
Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
- // Compute the weighted frequency of this loop being executed and see if it
- // is less than 20% of the function entry baseline frequency. Note that we
- // always have a canonical loop here because we think we *can* vectorize.
- // FIXME: This is hidden behind a flag due to pervasive problems with
- // exactly what block frequency models.
- if (LoopVectorizeWithBlockFrequency) {
- BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
- if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
- LoopEntryFreq < ColdEntryFreq)
- OptForSize = true;
- }
-
// Check the function attributes to see if implicit floats are allowed.
// FIXME: This check doesn't seem possibly correct -- what if the loop is
// an integer loop and the vector instructions selected are purely integer
@@ -8015,11 +8013,6 @@ bool LoopVectorizePass::runImpl(
DB = &DB_;
ORE = &ORE_;
- // Compute some weights outside of the loop over the loops. Compute this
- // using a BranchProbability to re-use its scaling math.
- const BranchProbability ColdProb(1, 5); // 20%
- ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb;
-
// Don't attempt if
// 1. the target claims to have no vector registers, and
// 2. interleaving won't help ILP.
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d1349535f2982..b267230d31859 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3327,12 +3327,10 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
BundleMember->Dependencies++;
ScheduleData *DestBundle = UseSD->FirstInBundle;
- if (!DestBundle->IsScheduled) {
+ if (!DestBundle->IsScheduled)
BundleMember->incrementUnscheduledDeps(1);
- }
- if (!DestBundle->hasValidDependencies()) {
+ if (!DestBundle->hasValidDependencies())
WorkList.push_back(DestBundle);
- }
}
} else {
// I'm not sure if this can ever happen. But we need to be safe.