summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-08-07 23:01:33 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-08-07 23:01:33 +0000
commitee8648bdac07986a0f1ec897b02ec82a2f144d46 (patch)
tree52d1861acda1205241ee35a94aa63129c604d469 /lib
parent1a82d4c088707c791c792f6822f611b47a12bdfe (diff)
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp5
-rw-r--r--lib/Analysis/AliasDebugger.cpp4
-rw-r--r--lib/Analysis/AliasSetTracker.cpp3
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp6
-rw-r--r--lib/Analysis/ConstantFolding.cpp6
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp320
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp6
-rw-r--r--lib/Analysis/IVUsers.cpp17
-rw-r--r--lib/Analysis/InstructionSimplify.cpp114
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp447
-rw-r--r--lib/Analysis/NoAliasAnalysis.cpp1
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp6
-rw-r--r--lib/Analysis/ValueTracking.cpp2
-rw-r--r--lib/Analysis/VectorUtils.cpp198
-rw-r--r--lib/AsmParser/LLLexer.cpp1
-rw-r--r--lib/AsmParser/LLParser.cpp98
-rw-r--r--lib/AsmParser/LLToken.h1
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp75
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp8
-rw-r--r--lib/CodeGen/Analysis.cpp35
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp38
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp5
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp28
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h225
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h74
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp30
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h8
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp32
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp3
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp146
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp2
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp12
-rw-r--r--lib/CodeGen/GlobalMerge.cpp29
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp93
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp18
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp12
-rw-r--r--lib/CodeGen/MIRParser/MILexer.cpp30
-rw-r--r--lib/CodeGen/MIRParser/MILexer.h21
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp269
-rw-r--r--lib/CodeGen/MIRParser/MIParser.h16
-rw-r--r--lib/CodeGen/MIRParser/MIRParser.cpp164
-rw-r--r--lib/CodeGen/MIRPrinter.cpp165
-rw-r--r--lib/CodeGen/MachineDominators.cpp4
-rw-r--r--lib/CodeGen/MachineFunction.cpp40
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp3
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp48
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp47
-rw-r--r--lib/CodeGen/Passes.cpp12
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp57
-rw-r--r--lib/CodeGen/RegAllocFast.cpp14
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp42
-rw-r--r--lib/CodeGen/RegisterPressure.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp145
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp36
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp25
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp263
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp123
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h6
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp43
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp37
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp198
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp193
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp555
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp50
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp178
-rw-r--r--lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp4
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp14
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp45
-rw-r--r--lib/CodeGen/StackMaps.cpp165
-rw-r--r--lib/CodeGen/StackProtector.cpp2
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp30
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp49
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp8
-rw-r--r--lib/CodeGen/VirtRegMap.cpp73
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp75
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp8
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt4
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp108
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt2
-rw-r--r--lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt1
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp43
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp46
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp4
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp61
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp14
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h1
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h2
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h2
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h2
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h2
-rw-r--r--lib/IR/Attributes.cpp8
-rw-r--r--lib/IR/AutoUpgrade.cpp17
-rw-r--r--lib/IR/BasicBlock.cpp51
-rw-r--r--lib/IR/Core.cpp8
-rw-r--r--lib/IR/DIBuilder.cpp58
-rw-r--r--lib/IR/Dominators.cpp20
-rw-r--r--lib/IR/Value.cpp2
-rw-r--r--lib/IR/Verifier.cpp58
-rw-r--r--lib/LTO/LTOModule.cpp3
-rw-r--r--lib/LibDriver/LibDriver.cpp19
-rw-r--r--lib/MC/CMakeLists.txt1
-rw-r--r--lib/MC/MCAsmStreamer.cpp3
-rw-r--r--lib/MC/MCAssembler.cpp2
-rw-r--r--lib/MC/MCDisassembler/MCExternalSymbolizer.cpp6
-rw-r--r--lib/MC/MCInstrDesc.cpp2
-rw-r--r--lib/MC/MCSchedule.cpp34
-rw-r--r--lib/MC/MCSubtargetInfo.cpp49
-rw-r--r--lib/MC/MCSymbol.cpp3
-rw-r--r--lib/Object/Archive.cpp49
-rw-r--r--lib/Object/ArchiveWriter.cpp211
-rw-r--r--lib/Object/COFFObjectFile.cpp36
-rw-r--r--lib/Object/ELFYAML.cpp10
-rw-r--r--lib/Object/MachOObjectFile.cpp43
-rw-r--r--lib/Object/Object.cpp15
-rw-r--r--lib/Object/ObjectFile.cpp9
-rw-r--r--lib/Support/APFloat.cpp23
-rw-r--r--lib/Support/CommandLine.cpp30
-rw-r--r--lib/Support/Triple.cpp116
-rw-r--r--lib/TableGen/Record.cpp2
-rw-r--r--lib/TableGen/SetTheory.cpp2
-rw-r--r--lib/TableGen/TGParser.cpp2
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp15
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td5
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp43
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp39
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h4
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp50
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp189
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h21
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td11
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp20
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp9
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h2
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp12
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp16
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h24
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp15
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td10
-rw-r--r--lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp11
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp34
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp129
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h13
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp9
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h17
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp13
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp31
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.h4
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp31
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp57
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h15
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp64
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h4
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td9
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td14
-rw-r--r--lib/Target/AMDGPU/SILoadStoreOptimizer.cpp44
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp1
-rw-r--r--lib/Target/AMDGPU/SIPrepareScratchRegs.cpp1
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/SIShrinkInstructions.cpp39
-rw-r--r--lib/Target/ARM/ARM.td4
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp20
-rw-r--r--lib/Target/ARM/ARMCallingConv.td3
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp45
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp43
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h4
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp47
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp328
-rw-r--r--lib/Target/ARM/ARMISelLowering.h20
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td4
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp1453
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp22
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h2
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp38
-rw-r--r--lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp7
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp24
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h15
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp166
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp23
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp2
-rw-r--r--lib/Target/BPF/BPFFrameLowering.cpp16
-rw-r--r--lib/Target/BPF/BPFFrameLowering.h4
-rw-r--r--lib/Target/BPF/BPFISelLowering.cpp11
-rw-r--r--lib/Target/BPF/BPFSubtarget.cpp2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp8
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp4
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h3
-rw-r--r--lib/Target/Hexagon/BitTracker.cpp1127
-rw-r--r--lib/Target/Hexagon/BitTracker.h449
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt6
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp1174
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.h64
-rw-r--r--lib/Target/Hexagon/HexagonCommonGEP.cpp1325
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonGenExtract.cpp259
-rw-r--r--lib/Target/Hexagon/HexagonGenInsert.cpp1598
-rw-r--r--lib/Target/Hexagon/HexagonGenPredicate.cpp525
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp83
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h29
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h2
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp56
-rw-r--r--lib/Target/Hexagon/LLVMBuild.txt1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp9
-rw-r--r--lib/Target/MSP430/CMakeLists.txt1
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp9
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp9
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp74
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h9
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp9
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.cpp23
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.h31
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp3
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h6
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp167
-rw-r--r--lib/Target/Mips/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp8
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp11
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h4
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp10
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp3
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp38
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp5
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp91
-rw-r--r--lib/Target/Mips/MipsISelLowering.h24
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp19
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h4
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp5
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.cpp23
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.h31
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp2
-rw-r--r--lib/Target/Mips/MipsSubtarget.h6
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp4
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp12
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp14
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp202
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h17
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp32
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp5
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp14
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.h19
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp23
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp50
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp5
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td3
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp37
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp20
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h4
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp43
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp260
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h29
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp33
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td15
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp28
-rw-r--r--lib/Target/PowerPC/PPCScheduleP7.td4
-rw-r--r--lib/Target/PowerPC/PPCScheduleP8.td4
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.cpp22
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h31
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp21
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h2
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h15
-rw-r--r--lib/Target/PowerPC/PPCVSXFMAMutate.cpp10
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp209
-rw-r--r--lib/Target/Sparc/CMakeLists.txt1
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp12
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp26
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h4
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp31
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp93
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h12
-rw-r--r--lib/Target/Sparc/SparcInstrAliases.td135
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp9
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td19
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td4
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.cpp24
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.h31
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp2
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h6
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp9
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp19
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h4
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp52
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h17
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp6
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp6
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h3
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h15
-rw-r--r--lib/Target/TargetMachine.cpp5
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp9
-rw-r--r--lib/Target/WebAssembly/CMakeLists.txt1
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp3
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h3
-rw-r--r--lib/Target/WebAssembly/Makefile3
-rw-r--r--lib/Target/WebAssembly/README.txt11
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrAtomics.td7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrCall.td21
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrConv.td44
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrFloat.td44
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrFormats.td33
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.td19
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInteger.td45
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrMemory.td46
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrSIMD.td7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp56
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.h19
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.td34
-rw-r--r--lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp3
-rw-r--r--lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h1
-rw-r--r--lib/Target/WebAssembly/WebAssemblySubtarget.cpp4
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h24
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp23
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp1
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp6
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp32
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp5
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp82
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h8
-rw-r--r--lib/Target/X86/X86FastISel.cpp69
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp3
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp29
-rw-r--r--lib/Target/X86/X86FrameLowering.h4
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp13
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp940
-rw-r--r--lib/Target/X86/X86ISelLowering.h47
-rw-r--r--lib/Target/X86/X86InstrAVX512.td541
-rw-r--r--lib/Target/X86/X86InstrControl.td6
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td83
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp106
-rw-r--r--lib/Target/X86/X86InstrInfo.td14
-rw-r--r--lib/Target/X86/X86InstrSSE.td65
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h157
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h83
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp21
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp12
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h3
-rw-r--r--lib/Target/X86/X86Subtarget.cpp21
-rw-r--r--lib/Target/X86/X86Subtarget.h22
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp37
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h15
-rw-r--r--lib/Target/X86/X86WinEHState.cpp31
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp9
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp13
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h4
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp7
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp97
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h11
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp8
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp22
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.h2
-rw-r--r--lib/Target/XCore/XCoreSubtarget.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp5
-rw-r--r--lib/Target/XCore/XCoreTargetTransformInfo.h17
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp2
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp26
-rw-r--r--lib/Transforms/IPO/ElimAvailExtern.cpp84
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp10
-rw-r--r--lib/Transforms/IPO/IPO.cpp1
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp16
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h8
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp17
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp67
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp29
-rw-r--r--lib/Transforms/Scalar/GVN.cpp13
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp56
-rw-r--r--lib/Transforms/Scalar/LICM.cpp13
-rw-r--r--lib/Transforms/Scalar/LoopDistribute.cpp193
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopInterchange.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp6
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp8
-rw-r--r--lib/Transforms/Scalar/PlaceSafepoints.cpp4
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp3
-rw-r--r--lib/Transforms/Scalar/SROA.cpp21
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp2
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp66
-rw-r--r--lib/Transforms/Utils/Local.cpp11
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp1
-rw-r--r--lib/Transforms/Utils/LoopVersioning.cpp106
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp474
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp102
412 files changed, 16888 insertions, 6713 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index ad0727a0e0e5..44d137dffd22 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -71,11 +71,6 @@ void AliasAnalysis::deleteValue(Value *V) {
AA->deleteValue(V);
}
-void AliasAnalysis::copyValue(Value *From, Value *To) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- AA->copyValue(From, To);
-}
-
void AliasAnalysis::addEscapingUse(Use &U) {
assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
AA->addEscapingUse(U);
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index 1ef49fc02fef..e5107b3bc827 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -124,10 +124,6 @@ namespace {
assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
AliasAnalysis::deleteValue(V);
}
- void copyValue(Value *From, Value *To) override {
- Vals.insert(To);
- AliasAnalysis::copyValue(From, To);
- }
};
}
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index bf8cda1ffaec..54d0f4304e1f 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -544,9 +544,6 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
// the tracker already knows about a value, it will ignore the request.
//
void AliasSetTracker::copyValue(Value *From, Value *To) {
- // Notify the alias analysis implementation that this value is copied.
- AA.copyValue(From, To);
-
// First, look up the PointerRec for this pointer.
PointerMapType::iterator I = PointerMap.find_as(From);
if (I == PointerMap.end())
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 8e812252fdfe..68f766edb301 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -685,6 +685,9 @@ BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
if (CS.onlyReadsMemory())
Min = OnlyReadsMemory;
+ if (CS.onlyAccessesArgMemory())
+ Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+
// The AliasAnalysis base class has some smarts, lets use them.
return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
}
@@ -710,6 +713,9 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
if (F->onlyReadsMemory())
Min = OnlyReadsMemory;
+ if (F->onlyAccessesArgMemory())
+ Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+
const TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
if (isMemsetPattern16(F, TLI))
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 2f4c6a92f9af..02a5aef03223 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1234,6 +1234,8 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::sqrt:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
case Intrinsic::pow:
case Intrinsic::powi:
case Intrinsic::bswap:
@@ -1450,6 +1452,10 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
return ConstantFoldFP(floor, V, Ty);
case Intrinsic::ceil:
return ConstantFoldFP(ceil, V, Ty);
+ case Intrinsic::sin:
+ return ConstantFoldFP(sin, V, Ty);
+ case Intrinsic::cos:
+ return ConstantFoldFP(cos, V, Ty);
}
if (!TLI)
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index f1ddde252924..18d45dd6a396 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -42,94 +42,111 @@ STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
namespace {
- /// FunctionRecord - One instance of this structure is stored for every
- /// function in the program. Later, the entries for these functions are
- /// removed if the function is found to call an external function (in which
- /// case we know nothing about it.
- struct FunctionRecord {
- /// GlobalInfo - Maintain mod/ref info for all of the globals without
- /// addresses taken that are read or written (transitively) by this
- /// function.
- std::map<const GlobalValue*, unsigned> GlobalInfo;
-
- /// MayReadAnyGlobal - May read global variables, but it is not known which.
- bool MayReadAnyGlobal;
-
- unsigned getInfoForGlobal(const GlobalValue *GV) const {
- unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
- std::map<const GlobalValue*, unsigned>::const_iterator I =
+/// FunctionRecord - One instance of this structure is stored for every
+/// function in the program. Later, the entries for these functions are
+/// removed if the function is found to call an external function (in which
+/// case we know nothing about it.
+struct FunctionRecord {
+ /// GlobalInfo - Maintain mod/ref info for all of the globals without
+ /// addresses taken that are read or written (transitively) by this
+ /// function.
+ std::map<const GlobalValue *, unsigned> GlobalInfo;
+
+ /// MayReadAnyGlobal - May read global variables, but it is not known which.
+ bool MayReadAnyGlobal;
+
+ unsigned getInfoForGlobal(const GlobalValue *GV) const {
+ unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
+ std::map<const GlobalValue *, unsigned>::const_iterator I =
GlobalInfo.find(GV);
- if (I != GlobalInfo.end())
- Effect |= I->second;
- return Effect;
- }
+ if (I != GlobalInfo.end())
+ Effect |= I->second;
+ return Effect;
+ }
- /// FunctionEffect - Capture whether or not this function reads or writes to
- /// ANY memory. If not, we can do a lot of aggressive analysis on it.
- unsigned FunctionEffect;
+ /// FunctionEffect - Capture whether or not this function reads or writes to
+ /// ANY memory. If not, we can do a lot of aggressive analysis on it.
+ unsigned FunctionEffect;
- FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {}
- };
+ FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {}
+};
- /// GlobalsModRef - The actual analysis pass.
- class GlobalsModRef : public ModulePass, public AliasAnalysis {
- /// NonAddressTakenGlobals - The globals that do not have their addresses
- /// taken.
- std::set<const GlobalValue*> NonAddressTakenGlobals;
+/// GlobalsModRef - The actual analysis pass.
+class GlobalsModRef : public ModulePass, public AliasAnalysis {
+ /// NonAddressTakenGlobals - The globals that do not have their addresses
+ /// taken.
+ std::set<const GlobalValue *> NonAddressTakenGlobals;
- /// IndirectGlobals - The memory pointed to by this global is known to be
- /// 'owned' by the global.
- std::set<const GlobalValue*> IndirectGlobals;
+ /// IndirectGlobals - The memory pointed to by this global is known to be
+ /// 'owned' by the global.
+ std::set<const GlobalValue *> IndirectGlobals;
- /// AllocsForIndirectGlobals - If an instruction allocates memory for an
- /// indirect global, this map indicates which one.
- std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals;
+ /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+ /// indirect global, this map indicates which one.
+ std::map<const Value *, const GlobalValue *> AllocsForIndirectGlobals;
- /// FunctionInfo - For each function, keep track of what globals are
- /// modified or read.
- std::map<const Function*, FunctionRecord> FunctionInfo;
+ /// FunctionInfo - For each function, keep track of what globals are
+ /// modified or read.
+ std::map<const Function *, FunctionRecord> FunctionInfo;
- public:
- static char ID;
- GlobalsModRef() : ModulePass(ID) {
- initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
- }
+public:
+ static char ID;
+ GlobalsModRef() : ModulePass(ID) {
+ initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnModule(Module &M) override {
- InitializeAliasAnalysis(this, &M.getDataLayout());
+ bool runOnModule(Module &M) override {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
- // Find non-addr taken globals.
- AnalyzeGlobals(M);
+ // Find non-addr taken globals.
+ AnalyzeGlobals(M);
- // Propagate on CG.
- AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
- return false;
- }
+ // Propagate on CG.
+ AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
+ return false;
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.addRequired<CallGraphWrapperPass>();
- AU.setPreservesAll(); // Does not transform code
- }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<CallGraphWrapperPass>();
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) override;
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) override;
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) override {
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+ }
- //------------------------------------------------
- // Implement the AliasAnalysis API
- //
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(const Function *F) override {
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ Min = DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ Min = OnlyReadsMemory;
}
- /// getModRefBehavior - Return the behavior of the specified function if
- /// called from the specified call site. The call site may be null in which
- /// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(const Function *F) override {
- ModRefBehavior Min = UnknownModRefBehavior;
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+ }
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
+ ModRefBehavior Min = UnknownModRefBehavior;
+ if (const Function *F = CS.getCalledFunction())
if (FunctionRecord *FR = getFunctionInfo(F)) {
if (FR->FunctionEffect == 0)
Min = DoesNotAccessMemory;
@@ -137,68 +154,50 @@ namespace {
Min = OnlyReadsMemory;
}
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
- }
-
- /// getModRefBehavior - Return the behavior of the specified function if
- /// called from the specified call site. The call site may be null in which
- /// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
- ModRefBehavior Min = UnknownModRefBehavior;
-
- if (const Function* F = CS.getCalledFunction())
- if (FunctionRecord *FR = getFunctionInfo(F)) {
- if (FR->FunctionEffect == 0)
- Min = DoesNotAccessMemory;
- else if ((FR->FunctionEffect & Mod) == 0)
- Min = OnlyReadsMemory;
- }
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+ }
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
- }
+ void deleteValue(Value *V) override;
+ void addEscapingUse(Use &U) override;
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ void *getAdjustedAnalysisPointer(AnalysisID PI) override {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis *)this;
+ return this;
+ }
- void deleteValue(Value *V) override;
- void copyValue(Value *From, Value *To) override;
- void addEscapingUse(Use &U) override;
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(AnalysisID PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- private:
- /// getFunctionInfo - Return the function info for the function, or null if
- /// we don't have anything useful to say about it.
- FunctionRecord *getFunctionInfo(const Function *F) {
- std::map<const Function*, FunctionRecord>::iterator I =
+private:
+ /// getFunctionInfo - Return the function info for the function, or null if
+ /// we don't have anything useful to say about it.
+ FunctionRecord *getFunctionInfo(const Function *F) {
+ std::map<const Function *, FunctionRecord>::iterator I =
FunctionInfo.find(F);
- if (I != FunctionInfo.end())
- return &I->second;
- return nullptr;
- }
+ if (I != FunctionInfo.end())
+ return &I->second;
+ return nullptr;
+ }
- void AnalyzeGlobals(Module &M);
- void AnalyzeCallGraph(CallGraph &CG, Module &M);
- bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
- std::vector<Function*> &Writers,
- GlobalValue *OkayStoreDest = nullptr);
- bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
- };
+ void AnalyzeGlobals(Module &M);
+ void AnalyzeCallGraph(CallGraph &CG, Module &M);
+ bool AnalyzeUsesOfPointer(Value *V, std::vector<Function *> &Readers,
+ std::vector<Function *> &Writers,
+ GlobalValue *OkayStoreDest = nullptr);
+ bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
+};
}
char GlobalsModRef::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
- "globalsmodref-aa", "Simple mod/ref analysis for globals",
- false, true, false)
+INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
+ "Simple mod/ref analysis for globals", false, true,
+ false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
- "globalsmodref-aa", "Simple mod/ref analysis for globals",
- false, true, false)
+INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
+ "Simple mod/ref analysis for globals", false, true,
+ false)
Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
@@ -207,7 +206,7 @@ Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
/// (really, their address passed to something nontrivial), record this fact,
/// and record the functions that they are used directly in.
void GlobalsModRef::AnalyzeGlobals(Module &M) {
- std::vector<Function*> Readers, Writers;
+ std::vector<Function *> Readers, Writers;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (I->hasLocalLinkage()) {
if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
@@ -215,11 +214,12 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
NonAddressTakenGlobals.insert(I);
++NumNonAddrTakenFunctions;
}
- Readers.clear(); Writers.clear();
+ Readers.clear();
+ Writers.clear();
}
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
+ ++I)
if (I->hasLocalLinkage()) {
if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
// Remember that we are tracking this global, and the mod/ref fns
@@ -228,7 +228,7 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
for (unsigned i = 0, e = Readers.size(); i != e; ++i)
FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
- if (!I->isConstant()) // No need to keep track of writers to constants
+ if (!I->isConstant()) // No need to keep track of writers to constants
for (unsigned i = 0, e = Writers.size(); i != e; ++i)
FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
++NumNonAddrTakenGlobalVars;
@@ -238,7 +238,8 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
AnalyzeIndirectGlobalMemory(I))
++NumIndirectGlobalVars;
}
- Readers.clear(); Writers.clear();
+ Readers.clear();
+ Writers.clear();
}
}
@@ -249,10 +250,11 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
///
/// If OkayStoreDest is non-null, stores into this global are allowed.
bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
- std::vector<Function*> &Readers,
- std::vector<Function*> &Writers,
+ std::vector<Function *> &Readers,
+ std::vector<Function *> &Writers,
GlobalValue *OkayStoreDest) {
- if (!V->getType()->isPointerTy()) return true;
+ if (!V->getType()->isPointerTy())
+ return true;
for (Use &U : V->uses()) {
User *I = U.getUser();
@@ -262,7 +264,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
if (V == SI->getOperand(1)) {
Writers.push_back(SI->getParent()->getParent());
} else if (SI->getOperand(1) != OkayStoreDest) {
- return true; // Storing the pointer
+ return true; // Storing the pointer
}
} else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
if (AnalyzeUsesOfPointer(I, Readers, Writers))
@@ -282,7 +284,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
}
} else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
- return true; // Allow comparison against null.
+ return true; // Allow comparison against null.
} else {
return true;
}
@@ -301,7 +303,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
// Keep track of values related to the allocation of the memory, f.e. the
// value produced by the malloc call and any casts.
- std::vector<Value*> AllocRelatedValues;
+ std::vector<Value *> AllocRelatedValues;
// Walk the user list of the global. If we find anything other than a direct
// load or store, bail out.
@@ -310,13 +312,14 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
// The pointer loaded from the global can only be used in simple ways:
// we allow addressing of it and loading storing to it. We do *not* allow
// storing the loaded pointer somewhere else or passing to a function.
- std::vector<Function*> ReadersWriters;
+ std::vector<Function *> ReadersWriters;
if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
- return false; // Loaded pointer escapes.
+ return false; // Loaded pointer escapes.
// TODO: Could try some IP mod/ref of the loaded pointer.
} else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// Storing the global itself.
- if (SI->getOperand(0) == GV) return false;
+ if (SI->getOperand(0) == GV)
+ return false;
// If storing the null pointer, ignore it.
if (isa<ConstantPointerNull>(SI->getOperand(0)))
@@ -327,13 +330,13 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
GV->getParent()->getDataLayout());
if (!isAllocLikeFn(Ptr, TLI))
- return false; // Too hard to analyze.
+ return false; // Too hard to analyze.
// Analyze all uses of the allocation. If any of them are used in a
// non-simple way (e.g. stored to another global) bail out.
- std::vector<Function*> ReadersWriters;
+ std::vector<Function *> ReadersWriters;
if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
- return false; // Loaded pointer escapes.
+ return false; // Loaded pointer escapes.
// Remember that this allocation is related to the indirect global.
AllocRelatedValues.push_back(Ptr);
@@ -360,7 +363,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// We do a bottom-up SCC traversal of the call graph. In other words, we
// visit all callees before callers (leaf-first).
- for (scc_iterator<CallGraph*> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+ for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
const std::vector<CallGraphNode *> &SCC = *I;
assert(!SCC.empty() && "SCC with no functions?");
@@ -437,9 +440,10 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
}
// Scan the function bodies for explicit loads or stores.
- for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i)
+ for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;
+ ++i)
for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
- E = inst_end(SCC[i]->getFunction());
+ E = inst_end(SCC[i]->getFunction());
II != E && FunctionEffect != ModRef; ++II)
if (LoadInst *LI = dyn_cast<LoadInst>(&*II)) {
FunctionEffect |= Ref;
@@ -474,8 +478,6 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
}
}
-
-
/// alias - If one of the pointers is to a global that we are tracking, and the
/// other is some random pointer, we know there cannot be an alias, because the
/// address of the global isn't taken.
@@ -492,8 +494,10 @@ AliasResult GlobalsModRef::alias(const MemoryLocation &LocA,
if (GV1 || GV2) {
// If the global's address is taken, pretend we don't know it's a pointer to
// the global.
- if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = nullptr;
- if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = nullptr;
+ if (GV1 && !NonAddressTakenGlobals.count(GV1))
+ GV1 = nullptr;
+ if (GV2 && !NonAddressTakenGlobals.count(GV2))
+ GV2 = nullptr;
// If the two pointers are derived from two different non-addr-taken
// globals, or if one is and the other isn't, we know these can't alias.
@@ -554,7 +558,6 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
}
-
//===----------------------------------------------------------------------===//
// Methods to update the analysis as a result of the client transformation.
//
@@ -565,9 +568,10 @@ void GlobalsModRef::deleteValue(Value *V) {
// any AllocRelatedValues for it.
if (IndirectGlobals.erase(GV)) {
// Remove any entries in AllocsForIndirectGlobals for this global.
- for (std::map<const Value*, const GlobalValue*>::iterator
- I = AllocsForIndirectGlobals.begin(),
- E = AllocsForIndirectGlobals.end(); I != E; ) {
+ for (std::map<const Value *, const GlobalValue *>::iterator
+ I = AllocsForIndirectGlobals.begin(),
+ E = AllocsForIndirectGlobals.end();
+ I != E;) {
if (I->second == GV) {
AllocsForIndirectGlobals.erase(I++);
} else {
@@ -585,16 +589,12 @@ void GlobalsModRef::deleteValue(Value *V) {
AliasAnalysis::deleteValue(V);
}
-void GlobalsModRef::copyValue(Value *From, Value *To) {
- AliasAnalysis::copyValue(From, To);
-}
-
void GlobalsModRef::addEscapingUse(Use &U) {
// For the purposes of this analysis, it is conservatively correct to treat
// a newly escaping value equivalently to a deleted one. We could perhaps
// be more precise by processing the new use and attempting to update our
// saved analysis results to accommodate it.
deleteValue(U);
-
+
AliasAnalysis::addEscapingUse(U);
}
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 349b9cac2c2d..c0d2e375cb04 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -783,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
case Intrinsic::memmove:
// SROA can usually chew through these intrinsics, but they aren't free.
return false;
- case Intrinsic::frameescape:
+ case Intrinsic::localescape:
HasFrameEscape = true;
return false;
}
@@ -1424,11 +1424,11 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
cast<CallInst>(CS.getInstruction())->canReturnTwice())
return false;
- // Disallow inlining functions that call @llvm.frameescape. Doing this
+ // Disallow inlining functions that call @llvm.localescape. Doing this
// correctly would require major changes to the inliner.
if (CS.getCalledFunction() &&
CS.getCalledFunction()->getIntrinsicID() ==
- llvm::Intrinsic::frameescape)
+ llvm::Intrinsic::localescape)
return false;
}
}
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index b88b2496b875..926787d3be91 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -12,8 +12,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/IVUsers.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -34,6 +36,7 @@ using namespace llvm;
char IVUsers::ID = 0;
INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
"Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
@@ -137,6 +140,11 @@ bool IVUsers::AddUsersImpl(Instruction *I,
if (Width > 64 || !DL.isLegalInteger(Width))
return false;
+ // Don't attempt to promote ephemeral values to indvars. They will be removed
+ // later anyway.
+ if (EphValues.count(I))
+ return false;
+
// Get the symbolic expression for this instruction.
const SCEV *ISE = SE->getSCEV(I);
@@ -244,6 +252,7 @@ IVUsers::IVUsers()
}
void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
@@ -253,10 +262,16 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
L = l;
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+ *L->getHeader()->getParent());
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
+ // Collect ephemeral values so that AddUsersIfInteresting skips them.
+ EphValues.clear();
+ CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
// Find all uses of induction variables in this loop, and categorize
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 12e406bb1a2d..fa42b48b6cdb 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -3046,7 +3047,8 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse) {
+ FastMathFlags FMF, const Query &Q,
+ unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
@@ -3065,6 +3067,14 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Pred == FCmpInst::FCMP_TRUE)
return ConstantInt::get(GetCompareTy(LHS), 1);
+ // UNO/ORD predicates can be trivially folded if NaNs are ignored.
+ if (FMF.noNaNs()) {
+ if (Pred == FCmpInst::FCMP_UNO)
+ return ConstantInt::get(GetCompareTy(LHS), 0);
+ if (Pred == FCmpInst::FCMP_ORD)
+ return ConstantInt::get(GetCompareTy(LHS), 1);
+ }
+
// fcmp pred x, undef and fcmp pred undef, x
// fold to true if unordered, false if ordered
if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
@@ -3151,12 +3161,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const DataLayout &DL,
+ FastMathFlags FMF, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF,
+ Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
@@ -3511,6 +3521,82 @@ Value *llvm::SimplifyInsertValueInst(
RecursionLimit);
}
+/// SimplifyExtractValueInst - Given operands for an ExtractValueInst, see if we
+/// can fold the result. If not, this returns null.
+static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+ const Query &, unsigned) {
+ if (auto *CAgg = dyn_cast<Constant>(Agg))
+ return ConstantFoldExtractValueInstruction(CAgg, Idxs);
+
+ // extractvalue x, (insertvalue y, elt, n), n -> elt
+ unsigned NumIdxs = Idxs.size();
+ for (auto *IVI = dyn_cast<InsertValueInst>(Agg); IVI != nullptr;
+ IVI = dyn_cast<InsertValueInst>(IVI->getAggregateOperand())) {
+ ArrayRef<unsigned> InsertValueIdxs = IVI->getIndices();
+ unsigned NumInsertValueIdxs = InsertValueIdxs.size();
+ unsigned NumCommonIdxs = std::min(NumInsertValueIdxs, NumIdxs);
+ if (InsertValueIdxs.slice(0, NumCommonIdxs) ==
+ Idxs.slice(0, NumCommonIdxs)) {
+ if (NumIdxs == NumInsertValueIdxs)
+ return IVI->getInsertedValueOperand();
+ break;
+ }
+ }
+
+ return nullptr;
+}
+
+Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
+ AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
+}
+
+/// SimplifyExtractElementInst - Given operands for an ExtractElementInst, see if we
+/// can fold the result. If not, this returns null.
+static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
+ unsigned) {
+ if (auto *CVec = dyn_cast<Constant>(Vec)) {
+ if (auto *CIdx = dyn_cast<Constant>(Idx))
+ return ConstantFoldExtractElementInstruction(CVec, CIdx);
+
+ // The index is not relevant if our vector is a splat.
+ if (auto *Splat = CVec->getSplatValue())
+ return Splat;
+
+ if (isa<UndefValue>(Vec))
+ return UndefValue::get(Vec->getType()->getVectorElementType());
+ }
+
+ // If extracting a specified index from the vector, see if we can recursively
+ // find a previously computed scalar that was inserted into the vector.
+ if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) {
+ unsigned IndexVal = IdxC->getZExtValue();
+ unsigned VectorWidth = Vec->getType()->getVectorNumElements();
+
+ // If this is extracting an invalid index, turn this into undef, to avoid
+ // crashing the code below.
+ if (IndexVal >= VectorWidth)
+ return UndefValue::get(Vec->getType()->getVectorElementType());
+
+ if (Value *Elt = findScalarElement(Vec, IndexVal))
+ return Elt;
+ }
+
+ return nullptr;
+}
+
+Value *llvm::SimplifyExtractElementInst(
+ Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) {
+ return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
+}
+
/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
// If all of the PHI's incoming values are the same then replace the PHI node
@@ -3670,7 +3756,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
- return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+ return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
}
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -3900,9 +3986,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::FCmp:
- Result =
- SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0),
- I->getOperand(1), DL, TLI, DT, AC, I);
+ Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1),
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::Select:
Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
@@ -3920,6 +4006,18 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
IV->getIndices(), DL, TLI, DT, AC, I);
break;
}
+ case Instruction::ExtractValue: {
+ auto *EVI = cast<ExtractValueInst>(I);
+ Result = SimplifyExtractValueInst(EVI->getAggregateOperand(),
+ EVI->getIndices(), DL, TLI, DT, AC, I);
+ break;
+ }
+ case Instruction::ExtractElement: {
+ auto *EEI = cast<ExtractElementInst>(I);
+ Result = SimplifyExtractElementInst(
+ EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I);
+ break;
+ }
case Instruction::PHI:
Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I));
break;
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index b11cd7e84a6d..becbae4c5b50 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -48,6 +48,13 @@ static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
+/// \brief The maximum iterations used to merge memory checks
+static cl::opt<unsigned> MemoryCheckMergeThreshold(
+ "memory-check-merge-threshold", cl::Hidden,
+ cl::desc("Maximum number of comparisons done when trying to merge "
+ "runtime memory checks. (default = 100)"),
+ cl::init(100));
+
/// Maximum SIMD width.
const unsigned VectorizerParams::MaxVectorWidth = 64;
@@ -112,35 +119,182 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
return SE->getSCEV(Ptr);
}
-void LoopAccessInfo::RuntimePointerCheck::insert(
- ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
- unsigned ASId, const ValueToValueMap &Strides) {
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
+ unsigned DepSetId, unsigned ASId,
+ const ValueToValueMap &Strides) {
// Get the stride replaced scev.
const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
- Pointers.push_back(Ptr);
- Starts.push_back(AR->getStart());
- Ends.push_back(ScEnd);
- IsWritePtr.push_back(WritePtr);
- DependencySetId.push_back(DepSetId);
- AliasSetId.push_back(ASId);
+ Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId,
+ Sc);
+}
+
+bool RuntimePointerChecking::needsChecking(
+ const CheckingPtrGroup &M, const CheckingPtrGroup &N,
+ const SmallVectorImpl<int> *PtrPartition) const {
+ for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
+ for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
+ if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+ return true;
+ return false;
+}
+
+/// Compare \p I and \p J and return the minimum.
+/// Return nullptr in case we couldn't find an answer.
+static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
+ ScalarEvolution *SE) {
+ const SCEV *Diff = SE->getMinusSCEV(J, I);
+ const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
+
+ if (!C)
+ return nullptr;
+ if (C->getValue()->isNegative())
+ return J;
+ return I;
+}
+
+bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) {
+ const SCEV *Start = RtCheck.Pointers[Index].Start;
+ const SCEV *End = RtCheck.Pointers[Index].End;
+
+ // Compare the starts and ends with the known minimum and maximum
+ // of this set. We need to know how we compare against the min/max
+ // of the set in order to be able to emit memchecks.
+ const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
+ if (!Min0)
+ return false;
+
+ const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
+ if (!Min1)
+ return false;
+
+ // Update the low bound expression if we've found a new min value.
+ if (Min0 == Start)
+ Low = Start;
+
+ // Update the high bound expression if we've found a new max value.
+ if (Min1 != End)
+ High = End;
+
+ Members.push_back(Index);
+ return true;
}
-bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
+void RuntimePointerChecking::groupChecks(
+ MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+ // We build the groups from dependency candidates equivalence classes
+ // because:
+ // - We know that pointers in the same equivalence class share
+ // the same underlying object and therefore there is a chance
+ // that we can compare pointers
+ // - We wouldn't be able to merge two pointers for which we need
+ // to emit a memcheck. The classes in DepCands are already
+ // conveniently built such that no two pointers in the same
+ // class need checking against each other.
+
+ // We use the following (greedy) algorithm to construct the groups
+ // For every pointer in the equivalence class:
+ // For each existing group:
+ // - if the difference between this pointer and the min/max bounds
+ // of the group is a constant, then make the pointer part of the
+ // group and update the min/max bounds of that group as required.
+
+ CheckingGroups.clear();
+
+ // If we don't have the dependency partitions, construct a new
+ // checking pointer group for each pointer.
+ if (!UseDependencies) {
+ for (unsigned I = 0; I < Pointers.size(); ++I)
+ CheckingGroups.push_back(CheckingPtrGroup(I, *this));
+ return;
+ }
+
+ unsigned TotalComparisons = 0;
+
+ DenseMap<Value *, unsigned> PositionMap;
+ for (unsigned Index = 0; Index < Pointers.size(); ++Index)
+ PositionMap[Pointers[Index].PointerValue] = Index;
+
+ // We need to keep track of what pointers we've already seen so we
+ // don't process them twice.
+ SmallSet<unsigned, 2> Seen;
+
+ // Go through all equivalence classes, get the the "pointer check groups"
+ // and add them to the overall solution. We use the order in which accesses
+ // appear in 'Pointers' to enforce determinism.
+ for (unsigned I = 0; I < Pointers.size(); ++I) {
+ // We've seen this pointer before, and therefore already processed
+ // its equivalence class.
+ if (Seen.count(I))
+ continue;
+
+ MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
+ Pointers[I].IsWritePtr);
+
+ SmallVector<CheckingPtrGroup, 2> Groups;
+ auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
+
+ // Because DepCands is constructed by visiting accesses in the order in
+ // which they appear in alias sets (which is deterministic) and the
+ // iteration order within an equivalence class member is only dependent on
+ // the order in which unions and insertions are performed on the
+ // equivalence class, the iteration order is deterministic.
+ for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
+ MI != ME; ++MI) {
+ unsigned Pointer = PositionMap[MI->getPointer()];
+ bool Merged = false;
+ // Mark this pointer as seen.
+ Seen.insert(Pointer);
+
+ // Go through all the existing sets and see if we can find one
+ // which can include this pointer.
+ for (CheckingPtrGroup &Group : Groups) {
+ // Don't perform more than a certain amount of comparisons.
+ // This should limit the cost of grouping the pointers to something
+ // reasonable. If we do end up hitting this threshold, the algorithm
+ // will create separate groups for all remaining pointers.
+ if (TotalComparisons > MemoryCheckMergeThreshold)
+ break;
+
+ TotalComparisons++;
+
+ if (Group.addPointer(Pointer)) {
+ Merged = true;
+ break;
+ }
+ }
+
+ if (!Merged)
+ // We couldn't add this pointer to any existing set or the threshold
+ // for the number of comparisons has been reached. Create a new group
+ // to hold the current pointer.
+ Groups.push_back(CheckingPtrGroup(Pointer, *this));
+ }
+
+ // We've computed the grouped checks for this partition.
+ // Save the results and continue with the next one.
+ std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
+ }
+}
+
+bool RuntimePointerChecking::needsChecking(
unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+ const PointerInfo &PointerI = Pointers[I];
+ const PointerInfo &PointerJ = Pointers[J];
+
// No need to check if two readonly pointers intersect.
- if (!IsWritePtr[I] && !IsWritePtr[J])
+ if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
return false;
// Only need to check pointers between two different dependency sets.
- if (DependencySetId[I] == DependencySetId[J])
+ if (PointerI.DependencySetId == PointerJ.DependencySetId)
return false;
// Only need to check pointers in the same alias set.
- if (AliasSetId[I] != AliasSetId[J])
+ if (PointerI.AliasSetId != PointerJ.AliasSetId)
return false;
// If PtrPartition is set omit checks between pointers of the same partition.
@@ -153,45 +307,75 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
return true;
}
-void LoopAccessInfo::RuntimePointerCheck::print(
+void RuntimePointerChecking::print(
raw_ostream &OS, unsigned Depth,
const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
- if (NumPointers == 0)
- return;
OS.indent(Depth) << "Run-time memory checks:\n";
+
unsigned N = 0;
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition)) {
- OS.indent(Depth) << N++ << ":\n";
- OS.indent(Depth + 2) << *Pointers[I];
- if (PtrPartition)
- OS << " (Partition: " << (*PtrPartition)[I] << ")";
- OS << "\n";
- OS.indent(Depth + 2) << *Pointers[J];
- if (PtrPartition)
- OS << " (Partition: " << (*PtrPartition)[J] << ")";
- OS << "\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I)
+ for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
+ if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
+ OS.indent(Depth) << "Check " << N++ << ":\n";
+ OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
+
+ for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
+ OS.indent(Depth + 2)
+ << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n";
+ if (PtrPartition)
+ OS << " (Partition: "
+ << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
+ << "\n";
+ }
+
+ OS.indent(Depth + 2) << "Against group " << J << ":\n";
+
+ for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
+ OS.indent(Depth + 2)
+ << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n";
+ if (PtrPartition)
+ OS << " (Partition: "
+ << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
+ << "\n";
+ }
}
+
+ OS.indent(Depth) << "Grouped accesses:\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+ OS.indent(Depth + 2) << "Group " << I << ":\n";
+ OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
+ << " High: " << *CheckingGroups[I].High << ")\n";
+ for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
+ OS.indent(Depth + 6) << "Member: "
+ << *Pointers[CheckingGroups[I].Members[J]].Expr
+ << "\n";
+ }
+ }
}
-unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
+unsigned RuntimePointerChecking::getNumberOfChecks(
const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
+
+ unsigned NumPartitions = CheckingGroups.size();
unsigned CheckCount = 0;
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition))
+ for (unsigned I = 0; I < NumPartitions; ++I)
+ for (unsigned J = I + 1; J < NumPartitions; ++J)
+ if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
CheckCount++;
return CheckCount;
}
-bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+bool RuntimePointerChecking::needsAnyChecking(
const SmallVectorImpl<int> *PtrPartition) const {
- return getNumberOfChecks(PtrPartition) != 0;
+ unsigned NumPointers = Pointers.size();
+
+ for (unsigned I = 0; I < NumPointers; ++I)
+ for (unsigned J = I + 1; J < NumPointers; ++J)
+ if (needsChecking(I, J, PtrPartition))
+ return true;
+ return false;
}
namespace {
@@ -207,7 +391,8 @@ public:
AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA)
- : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {}
+ : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
+ IsRTCheckAnalysisNeeded(false) {}
/// \brief Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
@@ -226,11 +411,12 @@ public:
}
/// \brief Check whether we can check the pointers at runtime for
- /// non-intersection. Returns true when we have 0 pointers
- /// (a check on 0 pointers for non-intersection will always return true).
- bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
- bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop,
- const ValueToValueMap &Strides,
+ /// non-intersection.
+ ///
+ /// Returns true if we need no check or if we do and we can generate them
+ /// (i.e. the pointers have computable bounds).
+ bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
+ Loop *TheLoop, const ValueToValueMap &Strides,
bool ShouldCheckStride = false);
/// \brief Goes over all memory accesses, checks whether a RT check is needed
@@ -239,8 +425,11 @@ public:
processMemAccesses();
}
- bool isRTCheckNeeded() { return IsRTCheckNeeded; }
-
+ /// \brief Initial processing of memory accesses determined that we need to
+ /// perform dependency checking.
+ ///
+ /// Note that this can later be cleared if we retry memcheck analysis without
+ /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
/// We decided that no dependence analysis would be used. Reset the state.
@@ -255,7 +444,7 @@ private:
typedef SetVector<MemAccessInfo> PtrAccessSet;
/// \brief Go over all memory access and check whether runtime pointer checks
- /// are needed /// and build sets of dependency check candidates.
+ /// are needed and build sets of dependency check candidates.
void processMemAccesses();
/// Set of all accesses.
@@ -280,7 +469,14 @@ private:
/// dependence check.
MemoryDepChecker::DepCandidates &DepCands;
- bool IsRTCheckNeeded;
+ /// \brief Initial processing of memory accesses determined that we may need
+ /// to add memchecks. Perform the analysis to determine the necessary checks.
+ ///
+ /// Note that, this is different from isDependencyCheckNeeded. When we retry
+ /// memcheck analysis without dependency checking
+ /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
+ /// while this remains set if we have potentially dependent accesses.
+ bool IsRTCheckAnalysisNeeded;
};
} // end anonymous namespace
@@ -296,16 +492,16 @@ static bool hasComputableBounds(ScalarEvolution *SE,
return AR->isAffine();
}
-bool AccessAnalysis::canCheckPtrAtRT(
- LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck,
- ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
- bool ShouldCheckStride) {
+bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
+ ScalarEvolution *SE, Loop *TheLoop,
+ const ValueToValueMap &StridesMap,
+ bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;
- NeedRTCheck = false;
- if (!IsRTCheckNeeded) return true;
+ bool NeedRTCheck = false;
+ if (!IsRTCheckAnalysisNeeded) return true;
bool IsDepCheckNeeded = isDependencyCheckNeeded();
@@ -313,6 +509,9 @@ bool AccessAnalysis::canCheckPtrAtRT(
// Accesses between different groups doesn't need to be checked.
unsigned ASId = 1;
for (auto &AS : AST) {
+ int NumReadPtrChecks = 0;
+ int NumWritePtrChecks = 0;
+
// We assign consecutive id to access from different dependence sets.
// Accesses within the same set don't need a runtime check.
unsigned RunningDepId = 1;
@@ -323,6 +522,11 @@ bool AccessAnalysis::canCheckPtrAtRT(
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
MemAccessInfo Access(Ptr, IsWrite);
+ if (IsWrite)
+ ++NumWritePtrChecks;
+ else
+ ++NumReadPtrChecks;
+
if (hasComputableBounds(SE, StridesMap, Ptr) &&
// When we run after a failing dependency check we have to make sure
// we don't have wrapping pointers.
@@ -341,7 +545,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
// Each access has its own dependence set.
DepId = RunningDepId++;
- RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+ RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
} else {
@@ -350,15 +554,21 @@ bool AccessAnalysis::canCheckPtrAtRT(
}
}
+ // If we have at least two writes or one write and a read then we need to
+ // check them. But there is no need to checks if there is only one
+ // dependence set for this alias set.
+ //
+ // Note that this function computes CanDoRT and NeedRTCheck independently.
+ // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
+ // for which we couldn't find the bounds but we don't actually need to emit
+ // any checks so it does not matter.
+ if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
+ NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
+ NumWritePtrChecks >= 1));
+
++ASId;
}
- // We need a runtime check if there are any accesses that need checking.
- // However, some accesses cannot be checked (for example because we
- // can't determine their bounds). In these cases we would need a check
- // but wouldn't be able to add it.
- NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr);
-
// If the pointers that we would use for the bounds comparison have different
// address spaces, assume the values aren't directly comparable, so we can't
// use them for the runtime check. We also have to assume they could
@@ -368,14 +578,15 @@ bool AccessAnalysis::canCheckPtrAtRT(
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i + 1; j < NumPointers; ++j) {
// Only need to check pointers between two different dependency sets.
- if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+ if (RtCheck.Pointers[i].DependencySetId ==
+ RtCheck.Pointers[j].DependencySetId)
continue;
// Only need to check pointers in the same alias set.
- if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
+ if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
continue;
- Value *PtrI = RtCheck.Pointers[i];
- Value *PtrJ = RtCheck.Pointers[j];
+ Value *PtrI = RtCheck.Pointers[i].PointerValue;
+ Value *PtrJ = RtCheck.Pointers[j].PointerValue;
unsigned ASi = PtrI->getType()->getPointerAddressSpace();
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
@@ -387,7 +598,18 @@ bool AccessAnalysis::canCheckPtrAtRT(
}
}
- return CanDoRT;
+ if (NeedRTCheck && CanDoRT)
+ RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+
+ DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
+ << " pointer comparisons.\n");
+
+ RtCheck.Need = NeedRTCheck;
+
+ bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
+ if (!CanDoRTIfNeeded)
+ RtCheck.reset();
+ return CanDoRTIfNeeded;
}
void AccessAnalysis::processMemAccesses() {
@@ -470,7 +692,7 @@ void AccessAnalysis::processMemAccesses() {
// catch "a[i] = a[i] + " without having to do a dependence check).
if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
CheckDeps.insert(Access);
- IsRTCheckNeeded = true;
+ IsRTCheckAnalysisNeeded = true;
}
if (IsWrite)
@@ -600,7 +822,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
// Check the step is constant.
const SCEV *Step = AR->getStepRecurrence(*SE);
- // Calculate the pointer stride and check if it is consecutive.
+ // Calculate the pointer stride and check if it is constant.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C) {
DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
@@ -805,11 +1027,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
<< *InstMap[BIdx] << ": " << *Dist << "\n");
- // Need consecutive accesses. We don't want to vectorize
+ // Need accesses with constant stride. We don't want to vectorize
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
// the address space.
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
- DEBUG(dbgs() << "Non-consecutive pointer access\n");
+ DEBUG(dbgs() << "Pointer access with non-constant stride\n");
return Dependence::Unknown;
}
@@ -859,8 +1081,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
unsigned Stride = std::abs(StrideAPtr);
if (Stride > 1 &&
- areStridedAccessesIndependent(Distance, Stride, TypeByteSize))
+ areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) {
+ DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
return Dependence::NoDep;
+ }
// Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
@@ -1098,8 +1322,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
unsigned NumReads = 0;
unsigned NumReadWrites = 0;
- PtrRtCheck.Pointers.clear();
- PtrRtCheck.Need = false;
+ PtrRtChecking.Pointers.clear();
+ PtrRtChecking.Need = false;
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
@@ -1258,28 +1482,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- bool NeedRTCheck;
- bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck,
- NeedRTCheck, SE,
- TheLoop, Strides);
-
- DEBUG(dbgs() << "LAA: We need to do "
- << PtrRtCheck.getNumberOfChecks(nullptr)
- << " pointer comparisons.\n");
-
- // Check that we found the bounds for the pointer.
- if (CanDoRT)
- DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
- else if (NeedRTCheck) {
+ bool CanDoRTIfNeeded =
+ Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
+ if (!CanDoRTIfNeeded) {
emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
- DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
- "the array bounds.\n");
- PtrRtCheck.reset();
+ DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
+ << "the array bounds.\n");
CanVecMem = false;
return;
}
- PtrRtCheck.Need = NeedRTCheck;
+ DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
@@ -1290,23 +1503,21 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
- NeedRTCheck = true;
// Clear the dependency checks. We assume they are not needed.
Accesses.resetDepChecks(DepChecker);
- PtrRtCheck.reset();
- PtrRtCheck.Need = true;
+ PtrRtChecking.reset();
+ PtrRtChecking.Need = true;
- CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE,
- TheLoop, Strides, true);
+ CanDoRTIfNeeded =
+ Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
// Check that we found the bounds for the pointer.
- if (NeedRTCheck && !CanDoRT) {
+ if (!CanDoRTIfNeeded) {
emitAnalysis(LoopAccessReport()
<< "cannot check memory dependencies at runtime");
DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
- PtrRtCheck.reset();
CanVecMem = false;
return;
}
@@ -1317,8 +1528,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (CanVecMem)
DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
- << (NeedRTCheck ? "" : " don't")
- << " need a runtime memory check.\n");
+ << (PtrRtChecking.Need ? "" : " don't")
+ << " need runtime memory checks.\n");
else {
emitAnalysis(LoopAccessReport() <<
"unsafe dependent memory operations in loop");
@@ -1357,35 +1568,38 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
- if (!PtrRtCheck.Need)
+ if (!PtrRtChecking.Need)
return std::make_pair(nullptr, nullptr);
- unsigned NumPointers = PtrRtCheck.Pointers.size();
- SmallVector<TrackingVH<Value> , 2> Starts;
- SmallVector<TrackingVH<Value> , 2> Ends;
+ SmallVector<TrackingVH<Value>, 2> Starts;
+ SmallVector<TrackingVH<Value>, 2> Ends;
LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, DL, "induction");
Instruction *FirstInst = nullptr;
- for (unsigned i = 0; i < NumPointers; ++i) {
- Value *Ptr = PtrRtCheck.Pointers[i];
+ for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
+ const RuntimePointerChecking::CheckingPtrGroup &CG =
+ PtrRtChecking.CheckingGroups[i];
+ Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue;
const SCEV *Sc = SE->getSCEV(Ptr);
if (SE->isLoopInvariant(Sc, TheLoop)) {
- DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
- *Ptr <<"\n");
+ DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+ << "\n");
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {
- DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
unsigned AS = Ptr->getType()->getPointerAddressSpace();
// Use this type for pointer arithmetic.
Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+ Value *Start = nullptr, *End = nullptr;
- Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc);
- Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
+ DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
+ DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
Starts.push_back(Start);
Ends.push_back(End);
}
@@ -1394,9 +1608,14 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
- for (unsigned i = 0; i < NumPointers; ++i) {
- for (unsigned j = i+1; j < NumPointers; ++j) {
- if (!PtrRtCheck.needsChecking(i, j, PtrPartition))
+ for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
+ for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
+ const RuntimePointerChecking::CheckingPtrGroup &CGI =
+ PtrRtChecking.CheckingGroups[i];
+ const RuntimePointerChecking::CheckingPtrGroup &CGJ =
+ PtrRtChecking.CheckingGroups[j];
+
+ if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
continue;
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
@@ -1447,7 +1666,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
const ValueToValueMap &Strides)
- : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
+ : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
@@ -1457,7 +1676,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (CanVecMem) {
- if (PtrRtCheck.Need)
+ if (PtrRtChecking.Need)
OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
else
OS.indent(Depth) << "Memory dependences are safe\n";
@@ -1476,7 +1695,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
// List the pair of accesses need run-time checks to prove independence.
- PtrRtCheck.print(OS, Depth);
+ PtrRtChecking.print(OS, Depth);
OS << "\n";
OS.indent(Depth) << "Store to invariant address was "
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 7617622b9ab6..322a9a80de4c 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -72,7 +72,6 @@ namespace {
}
void deleteValue(Value *V) override {}
- void copyValue(Value *From, Value *To) override {}
void addEscapingUse(Use &U) override {}
/// getAdjustedAnalysisPointer - This method is used when a pass implements
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 520d1e5ef87d..7d1c3fbef68a 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -28,12 +28,12 @@ namespace {
///
/// This is used when no target specific information is available.
struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
- explicit NoTTIImpl(const DataLayout *DL)
+ explicit NoTTIImpl(const DataLayout &DL)
: TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
};
}
-TargetTransformInfo::TargetTransformInfo(const DataLayout *DL)
+TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
: TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
TargetTransformInfo::~TargetTransformInfo() {}
@@ -304,7 +304,7 @@ TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) {
char TargetIRAnalysis::PassID;
TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) {
- return Result(&F.getParent()->getDataLayout());
+ return Result(F.getParent()->getDataLayout());
}
// Register the basic pass.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index c45005f343d3..fa0d7798cae9 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1464,7 +1464,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// If the object is defined in the current Module, we'll be giving
// it the preferred alignment. Otherwise, we have to assume that it
// may only have the minimum ABI alignment.
- if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+ if (GVar->isStrongDefinitionForLinker())
Align = DL.getPreferredAlignment(GVar);
else
Align = DL.getABITypeAlignment(ObjectType);
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 96fddd103cc5..67f68dc8391e 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -11,7 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Value.h"
/// \brief Identify if the intrinsic is trivially vectorizable.
/// This method returns true if the intrinsic's argument types are all
@@ -211,3 +217,195 @@ llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
return Intrinsic::not_intrinsic;
}
+
+/// \brief Find the operand of the GEP that should be checked for consecutive
+/// stores. This ignores trailing indices that have no effect on the final
+/// pointer.
+unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
+ const DataLayout &DL = Gep->getModule()->getDataLayout();
+ unsigned LastOperand = Gep->getNumOperands() - 1;
+ unsigned GEPAllocSize = DL.getTypeAllocSize(
+ cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
+
+ // Walk backwards and try to peel off zeros.
+ while (LastOperand > 1 &&
+ match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) {
+ // Find the type we're currently indexing into.
+ gep_type_iterator GEPTI = gep_type_begin(Gep);
+ std::advance(GEPTI, LastOperand - 1);
+
+ // If it's a type with the same allocation size as the result of the GEP we
+ // can peel off the zero index.
+ if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
+ break;
+ --LastOperand;
+ }
+
+ return LastOperand;
+}
+
+/// \brief If the argument is a GEP, then returns the operand identified by
+/// getGEPInductionOperand. However, if there is some other non-loop-invariant
+/// operand, it returns that instead.
+llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE,
+ Loop *Lp) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return Ptr;
+
+ unsigned InductionOperand = getGEPInductionOperand(GEP);
+
+ // Check that all of the gep indices are uniform except for our induction
+ // operand.
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
+ if (i != InductionOperand &&
+ !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
+ return Ptr;
+ return GEP->getOperand(InductionOperand);
+}
+
+/// \brief If a value has only one user that is a CastInst, return it.
+llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) {
+ llvm::Value *UniqueCast = nullptr;
+ for (User *U : Ptr->users()) {
+ CastInst *CI = dyn_cast<CastInst>(U);
+ if (CI && CI->getType() == Ty) {
+ if (!UniqueCast)
+ UniqueCast = CI;
+ else
+ return nullptr;
+ }
+ }
+ return UniqueCast;
+}
+
+/// \brief Get the stride of a pointer access in a loop. Looks for symbolic
+/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
+llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
+ Loop *Lp) {
+ const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ if (!PtrTy || PtrTy->isAggregateType())
+ return nullptr;
+
+ // Try to remove a gep instruction to make the pointer (actually index at this
+ // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
+ // pointer, otherwise, we are analyzing the index.
+ llvm::Value *OrigPtr = Ptr;
+
+ // The size of the pointer access.
+ int64_t PtrAccessSize = 1;
+
+ Ptr = stripGetElementPtr(Ptr, SE, Lp);
+ const SCEV *V = SE->getSCEV(Ptr);
+
+ if (Ptr != OrigPtr)
+ // Strip off casts.
+ while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
+ V = C->getOperand();
+
+ const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
+ if (!S)
+ return nullptr;
+
+ V = S->getStepRecurrence(*SE);
+ if (!V)
+ return nullptr;
+
+ // Strip off the size of access multiplication if we are still analyzing the
+ // pointer.
+ if (OrigPtr == Ptr) {
+ const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
+ DL.getTypeAllocSize(PtrTy->getElementType());
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
+ if (M->getOperand(0)->getSCEVType() != scConstant)
+ return nullptr;
+
+ const APInt &APStepVal =
+ cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
+
+ // Huge step value - give up.
+ if (APStepVal.getBitWidth() > 64)
+ return nullptr;
+
+ int64_t StepVal = APStepVal.getSExtValue();
+ if (PtrAccessSize != StepVal)
+ return nullptr;
+ V = M->getOperand(1);
+ }
+ }
+
+ // Strip off casts.
+ Type *StripedOffRecurrenceCast = nullptr;
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
+ StripedOffRecurrenceCast = C->getType();
+ V = C->getOperand();
+ }
+
+ // Look for the loop invariant symbolic value.
+ const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
+ if (!U)
+ return nullptr;
+
+ llvm::Value *Stride = U->getValue();
+ if (!Lp->isLoopInvariant(Stride))
+ return nullptr;
+
+ // If we have stripped off the recurrence cast we have to make sure that we
+ // return the value that is used in this loop so that we can replace it later.
+ if (StripedOffRecurrenceCast)
+ Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
+
+ return Stride;
+}
+
+/// \brief Given a vector and an element number, see if the scalar value is
+/// already around as a register, for example if it were inserted then extracted
+/// from the vector.
+llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
+ assert(V->getType()->isVectorTy() && "Not looking at a vector?");
+ VectorType *VTy = cast<VectorType>(V->getType());
+ unsigned Width = VTy->getNumElements();
+ if (EltNo >= Width) // Out of range access.
+ return UndefValue::get(VTy->getElementType());
+
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->getAggregateElement(EltNo);
+
+ if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert to a variable element, we don't know what it is.
+ if (!isa<ConstantInt>(III->getOperand(2)))
+ return nullptr;
+ unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+
+ // If this is an insert to the element we are looking for, return the
+ // inserted value.
+ if (EltNo == IIElt)
+ return III->getOperand(1);
+
+ // Otherwise, the insertelement doesn't modify the value, recurse on its
+ // vector input.
+ return findScalarElement(III->getOperand(0), EltNo);
+ }
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+ unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
+ int InEl = SVI->getMaskValue(EltNo);
+ if (InEl < 0)
+ return UndefValue::get(VTy->getElementType());
+ if (InEl < (int)LHSWidth)
+ return findScalarElement(SVI->getOperand(0), InEl);
+ return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+ }
+
+ // Extract a value from a vector add operation with a constant zero.
+ Value *Val = nullptr; Constant *Con = nullptr;
+ if (match(V,
+ llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val),
+ llvm::PatternMatch::m_Constant(Con)))) {
+ if (Con->getAggregateElement(EltNo)->isNullValue())
+ return findScalarElement(Val, EltNo);
+ }
+
+ // Otherwise, we don't know.
+ return nullptr;
+}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 88f359d4fd5c..5c4bab734b2b 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -593,6 +593,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(attributes);
KEYWORD(alwaysinline);
+ KEYWORD(argmemonly);
KEYWORD(builtin);
KEYWORD(byval);
KEYWORD(inalloca);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index b3c7fa087d40..1c6e7bd18d0e 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -946,35 +946,42 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
B.addStackAlignmentAttr(Alignment);
continue;
}
- case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break;
- case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break;
- case lltok::kw_cold: B.addAttribute(Attribute::Cold); break;
- case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break;
- case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
- case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break;
- case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
- case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
- case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break;
- case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break;
- case lltok::kw_noimplicitfloat: B.addAttribute(Attribute::NoImplicitFloat); break;
- case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break;
- case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break;
- case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break;
- case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break;
- case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break;
- case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break;
- case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break;
- case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break;
- case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break;
- case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break;
- case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break;
- case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break;
- case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break;
- case lltok::kw_safestack: B.addAttribute(Attribute::SafeStack); break;
- case lltok::kw_sanitize_address: B.addAttribute(Attribute::SanitizeAddress); break;
- case lltok::kw_sanitize_thread: B.addAttribute(Attribute::SanitizeThread); break;
- case lltok::kw_sanitize_memory: B.addAttribute(Attribute::SanitizeMemory); break;
- case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break;
+ case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break;
+ case lltok::kw_argmemonly: B.addAttribute(Attribute::ArgMemOnly); break;
+ case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break;
+ case lltok::kw_cold: B.addAttribute(Attribute::Cold); break;
+ case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break;
+ case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
+ case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break;
+ case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
+ case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
+ case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break;
+ case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break;
+ case lltok::kw_noimplicitfloat:
+ B.addAttribute(Attribute::NoImplicitFloat); break;
+ case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break;
+ case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break;
+ case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break;
+ case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break;
+ case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break;
+ case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break;
+ case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break;
+ case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break;
+ case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break;
+ case lltok::kw_returns_twice:
+ B.addAttribute(Attribute::ReturnsTwice); break;
+ case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break;
+ case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break;
+ case lltok::kw_sspstrong:
+ B.addAttribute(Attribute::StackProtectStrong); break;
+ case lltok::kw_safestack: B.addAttribute(Attribute::SafeStack); break;
+ case lltok::kw_sanitize_address:
+ B.addAttribute(Attribute::SanitizeAddress); break;
+ case lltok::kw_sanitize_thread:
+ B.addAttribute(Attribute::SanitizeThread); break;
+ case lltok::kw_sanitize_memory:
+ B.addAttribute(Attribute::SanitizeMemory); break;
+ case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break;
// Error handling.
case lltok::kw_inreg:
@@ -1258,6 +1265,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_alignstack:
case lltok::kw_alwaysinline:
+ case lltok::kw_argmemonly:
case lltok::kw_builtin:
case lltok::kw_inlinehint:
case lltok::kw_jumptable:
@@ -1334,6 +1342,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_alignstack:
case lltok::kw_alwaysinline:
+ case lltok::kw_argmemonly:
case lltok::kw_builtin:
case lltok::kw_cold:
case lltok::kw_inlinehint:
@@ -2873,8 +2882,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
if (ValTy->isVectorTy() != BaseType->isVectorTy())
return Error(ID.Loc, "getelementptr index type missmatch");
if (ValTy->isVectorTy()) {
- unsigned ValNumEl = cast<VectorType>(ValTy)->getNumElements();
- unsigned PtrNumEl = cast<VectorType>(BaseType)->getNumElements();
+ unsigned ValNumEl = ValTy->getVectorNumElements();
+ unsigned PtrNumEl = BaseType->getVectorNumElements();
if (ValNumEl != PtrNumEl)
return Error(
ID.Loc,
@@ -4534,8 +4543,17 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_and:
case lltok::kw_or:
case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal);
- case lltok::kw_icmp:
- case lltok::kw_fcmp: return ParseCompare(Inst, PFS, KeywordVal);
+ case lltok::kw_icmp: return ParseCompare(Inst, PFS, KeywordVal);
+ case lltok::kw_fcmp: {
+ FastMathFlags FMF = EatFastMathFlagsIfPresent();
+ int Res = ParseCompare(Inst, PFS, KeywordVal);
+ if (Res != 0)
+ return Res;
+ if (FMF.any())
+ Inst->setFastMathFlags(FMF);
+ return 0;
+ }
+
// Casts.
case lltok::kw_trunc:
case lltok::kw_zext:
@@ -5572,6 +5590,11 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
SmallVector<Value*, 16> Indices;
bool AteExtraComma = false;
+ // GEP returns a vector of pointers if at least one of parameters is a vector.
+ // All vector parameters should have the same vector width.
+ unsigned GEPWidth = BaseType->isVectorTy() ?
+ BaseType->getVectorNumElements() : 0;
+
while (EatIfPresent(lltok::comma)) {
if (Lex.getKind() == lltok::MetadataVar) {
AteExtraComma = true;
@@ -5580,14 +5603,13 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
if (!Val->getType()->getScalarType()->isIntegerTy())
return Error(EltLoc, "getelementptr index must be an integer");
- if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy())
- return Error(EltLoc, "getelementptr index type missmatch");
+
if (Val->getType()->isVectorTy()) {
- unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements();
- unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements();
- if (ValNumEl != PtrNumEl)
+ unsigned ValNumEl = Val->getType()->getVectorNumElements();
+ if (GEPWidth && GEPWidth != ValNumEl)
return Error(EltLoc,
"getelementptr vector index has a wrong number of elements");
+ GEPWidth = ValNumEl;
}
Indices.push_back(Val);
}
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 2487d1208133..691f085f0c9f 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -100,6 +100,7 @@ namespace lltok {
// Attributes:
kw_attributes,
kw_alwaysinline,
+ kw_argmemonly,
kw_sanitize_address,
kw_builtin,
kw_byval,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 09f0b689bdc3..c04e8b9f1f37 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -697,6 +697,21 @@ static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) {
}
}
+static FastMathFlags getDecodedFastMathFlags(unsigned Val) {
+ FastMathFlags FMF;
+ if (0 != (Val & FastMathFlags::UnsafeAlgebra))
+ FMF.setUnsafeAlgebra();
+ if (0 != (Val & FastMathFlags::NoNaNs))
+ FMF.setNoNaNs();
+ if (0 != (Val & FastMathFlags::NoInfs))
+ FMF.setNoInfs();
+ if (0 != (Val & FastMathFlags::NoSignedZeros))
+ FMF.setNoSignedZeros();
+ if (0 != (Val & FastMathFlags::AllowReciprocal))
+ FMF.setAllowReciprocal();
+ return FMF;
+}
+
static void upgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) {
switch (Val) {
case 5: GV->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break;
@@ -1075,6 +1090,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::Alignment;
case bitc::ATTR_KIND_ALWAYS_INLINE:
return Attribute::AlwaysInline;
+ case bitc::ATTR_KIND_ARGMEMONLY:
+ return Attribute::ArgMemOnly;
case bitc::ATTR_KIND_BUILTIN:
return Attribute::Builtin;
case bitc::ATTR_KIND_BY_VAL:
@@ -3472,17 +3489,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
if (Record[OpNum] & (1 << bitc::PEO_EXACT))
cast<BinaryOperator>(I)->setIsExact(true);
} else if (isa<FPMathOperator>(I)) {
- FastMathFlags FMF;
- if (0 != (Record[OpNum] & FastMathFlags::UnsafeAlgebra))
- FMF.setUnsafeAlgebra();
- if (0 != (Record[OpNum] & FastMathFlags::NoNaNs))
- FMF.setNoNaNs();
- if (0 != (Record[OpNum] & FastMathFlags::NoInfs))
- FMF.setNoInfs();
- if (0 != (Record[OpNum] & FastMathFlags::NoSignedZeros))
- FMF.setNoSignedZeros();
- if (0 != (Record[OpNum] & FastMathFlags::AllowReciprocal))
- FMF.setAllowReciprocal();
+ FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
if (FMF.any())
I->setFastMathFlags(FMF);
}
@@ -3739,14 +3746,25 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *LHS, *RHS;
if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
- popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
- OpNum+1 != Record.size())
+ popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS))
+ return error("Invalid record");
+
+ unsigned PredVal = Record[OpNum];
+ bool IsFP = LHS->getType()->isFPOrFPVectorTy();
+ FastMathFlags FMF;
+ if (IsFP && Record.size() > OpNum+1)
+ FMF = getDecodedFastMathFlags(Record[++OpNum]);
+
+ if (OpNum+1 != Record.size())
return error("Invalid record");
if (LHS->getType()->isFPOrFPVectorTy())
- I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
+ I = new FCmpInst((FCmpInst::Predicate)PredVal, LHS, RHS);
else
- I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+ I = new ICmpInst((ICmpInst::Predicate)PredVal, LHS, RHS);
+
+ if (FMF.any())
+ I->setFastMathFlags(FMF);
InstructionList.push_back(I);
break;
}
@@ -4458,14 +4476,11 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
// Upgrade any old intrinsic calls in the function.
for (auto &I : UpgradedIntrinsics) {
- if (I.first != I.second) {
- for (auto UI = I.first->user_begin(), UE = I.first->user_end();
- UI != UE;) {
- User *U = *UI;
- ++UI;
- if (CallInst *CI = dyn_cast<CallInst>(U))
- UpgradeIntrinsicCall(CI, I.second);
- }
+ for (auto UI = I.first->user_begin(), UE = I.first->user_end(); UI != UE;) {
+ User *U = *UI;
+ ++UI;
+ if (CallInst *CI = dyn_cast<CallInst>(U))
+ UpgradeIntrinsicCall(CI, I.second);
}
}
@@ -4533,15 +4548,13 @@ std::error_code BitcodeReader::materializeModule(Module *M) {
// module is materialized because there could always be another function body
// with calls to the old function.
for (auto &I : UpgradedIntrinsics) {
- if (I.first != I.second) {
- for (auto *U : I.first->users()) {
- if (CallInst *CI = dyn_cast<CallInst>(U))
- UpgradeIntrinsicCall(CI, I.second);
- }
- if (!I.first->use_empty())
- I.first->replaceAllUsesWith(I.second);
- I.first->eraseFromParent();
+ for (auto *U : I.first->users()) {
+ if (CallInst *CI = dyn_cast<CallInst>(U))
+ UpgradeIntrinsicCall(CI, I.second);
}
+ if (!I.first->use_empty())
+ I.first->replaceAllUsesWith(I.second);
+ I.first->eraseFromParent();
}
UpgradedIntrinsics.clear();
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 622f7eaf0784..1a70ba5ac127 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -162,6 +162,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_ALIGNMENT;
case Attribute::AlwaysInline:
return bitc::ATTR_KIND_ALWAYS_INLINE;
+ case Attribute::ArgMemOnly:
+ return bitc::ATTR_KIND_ARGMEMONLY;
case Attribute::Builtin:
return bitc::ATTR_KIND_BUILTIN;
case Attribute::ByVal:
@@ -1759,13 +1761,17 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
pushValue(I.getOperand(2), InstID, Vals, VE);
break;
case Instruction::ICmp:
- case Instruction::FCmp:
+ case Instruction::FCmp: {
// compare returning Int1Ty or vector of Int1Ty
Code = bitc::FUNC_CODE_INST_CMP2;
PushValueAndType(I.getOperand(0), InstID, Vals, VE);
pushValue(I.getOperand(1), InstID, Vals, VE);
Vals.push_back(cast<CmpInst>(I).getPredicate());
+ uint64_t Flags = GetOptimizationFlags(&I);
+ if (Flags != 0)
+ Vals.push_back(Flags);
break;
+ }
case Instruction::Ret:
{
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 3224fac25cb4..98d4c8afc7b9 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -81,27 +81,27 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
/// If Offsets is non-null, it points to a vector to be filled in
/// with the in-memory offsets of each of the individual values.
///
-void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
- SmallVectorImpl<EVT> &ValueVTs,
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<uint64_t> *Offsets,
uint64_t StartingOffset) {
// Given a struct type, recursively traverse the elements.
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
for (StructType::element_iterator EB = STy->element_begin(),
EI = EB,
EE = STy->element_end();
EI != EE; ++EI)
- ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+ ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets,
StartingOffset + SL->getElementOffset(EI - EB));
return;
}
// Given an array type, recursively traverse the elements.
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Type *EltTy = ATy->getElementType();
- uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy);
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy);
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
- ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+ ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets,
StartingOffset + i * EltSize);
return;
}
@@ -109,7 +109,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
if (Ty->isVoidTy())
return;
// Base case: we can get an EVT for this LLVM IR type.
- ValueVTs.push_back(TLI.getValueType(Ty));
+ ValueVTs.push_back(TLI.getValueType(DL, Ty));
if (Offsets)
Offsets->push_back(StartingOffset);
}
@@ -233,7 +233,8 @@ static bool isNoopBitcast(Type *T1, Type *T2,
static const Value *getNoopInput(const Value *V,
SmallVectorImpl<unsigned> &ValLoc,
unsigned &DataBits,
- const TargetLoweringBase &TLI) {
+ const TargetLoweringBase &TLI,
+ const DataLayout &DL) {
while (true) {
// Try to look through V1; if V1 is not an instruction, it can't be looked
// through.
@@ -255,16 +256,16 @@ static const Value *getNoopInput(const Value *V,
// Make sure this isn't a truncating or extending cast. We could
// support this eventually, but don't bother for now.
if (!isa<VectorType>(I->getType()) &&
- TLI.getPointerTy().getSizeInBits() ==
- cast<IntegerType>(Op->getType())->getBitWidth())
+ DL.getPointerSizeInBits() ==
+ cast<IntegerType>(Op->getType())->getBitWidth())
NoopInput = Op;
} else if (isa<PtrToIntInst>(I)) {
// Look through ptrtoint.
// Make sure this isn't a truncating or extending cast. We could
// support this eventually, but don't bother for now.
if (!isa<VectorType>(I->getType()) &&
- TLI.getPointerTy().getSizeInBits() ==
- cast<IntegerType>(I->getType())->getBitWidth())
+ DL.getPointerSizeInBits() ==
+ cast<IntegerType>(I->getType())->getBitWidth())
NoopInput = Op;
} else if (isa<TruncInst>(I) &&
TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
@@ -331,14 +332,15 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
SmallVectorImpl<unsigned> &RetIndices,
SmallVectorImpl<unsigned> &CallIndices,
bool AllowDifferingSizes,
- const TargetLoweringBase &TLI) {
+ const TargetLoweringBase &TLI,
+ const DataLayout &DL) {
// Trace the sub-value needed by the return value as far back up the graph as
// possible, in the hope that it will intersect with the value produced by the
// call. In the simple case with no "returned" attribute, the hope is actually
// that we end up back at the tail call instruction itself.
unsigned BitsRequired = UINT_MAX;
- RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI);
+ RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL);
// If this slot in the value returned is undef, it doesn't matter what the
// call puts there, it'll be fine.
@@ -350,7 +352,7 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
// a "returned" attribute, the search will be blocked immediately and the loop
// a Noop.
unsigned BitsProvided = UINT_MAX;
- CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI);
+ CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL);
// There's no hope if we can't actually trace them to (the same part of!) the
// same value.
@@ -606,7 +608,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
// Finally, we can check whether the value produced by the tail call at this
// index is compatible with the value we return.
if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
- AllowDifferingSizes, TLI))
+ AllowDifferingSizes, TLI,
+ F->getParent()->getDataLayout()))
return false;
CallEmpty = !nextRealType(CallSubTypes, CallPath);
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 4cb460a7bbfc..0bad7954b980 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -69,24 +69,32 @@ void ARMException::beginFunction(const MachineFunction *MF) {
///
void ARMException::endFunction(const MachineFunction *MF) {
ARMTargetStreamer &ATS = getTargetStreamer();
+ const Function *F = MF->getFunction();
+ const Function *Per = nullptr;
+ if (F->hasPersonalityFn())
+ Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ assert(!MMI->getPersonality() || Per == MMI->getPersonality());
+ bool forceEmitPersonality =
+ F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+ F->needsUnwindTableEntry();
+ bool shouldEmitPersonality = forceEmitPersonality ||
+ !MMI->getLandingPads().empty();
if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
- MMI->getLandingPads().empty())
+ !shouldEmitPersonality)
ATS.emitCantUnwind();
- else {
- if (!MMI->getLandingPads().empty()) {
- // Emit references to personality.
- if (const Function *Personality = MMI->getPersonality()) {
- MCSymbol *PerSym = Asm->getSymbol(Personality);
- Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global);
- ATS.emitPersonality(PerSym);
- }
-
- // Emit .handlerdata directive.
- ATS.emitHandlerData();
-
- // Emit actual exception table
- emitExceptionTable();
+ else if (shouldEmitPersonality) {
+ // Emit references to personality.
+ if (Per) {
+ MCSymbol *PerSym = Asm->getSymbol(Per);
+ Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global);
+ ATS.emitPersonality(PerSym);
}
+
+ // Emit .handlerdata directive.
+ ATS.emitHandlerData();
+
+ // Emit actual exception table
+ emitExceptionTable();
}
if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 8a7e9f991611..125047e7bbb5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -820,7 +819,7 @@ void AsmPrinter::EmitFunctionBody() {
emitCFIInstruction(MI);
break;
- case TargetOpcode::FRAME_ALLOC:
+ case TargetOpcode::LOCAL_ESCAPE:
emitFrameAlloc(MI);
break;
@@ -1024,7 +1023,7 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit visibility info for declarations
for (const Function &F : M) {
- if (!F.isDeclaration())
+ if (!F.isDeclarationForLinker())
continue;
GlobalValue::VisibilityTypes V = F.getVisibility();
if (V == GlobalValue::DefaultVisibility)
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 0bc873e326be..2c212c7ecee1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -89,6 +89,7 @@ void DwarfCFIException::endModule() {
void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+ const Function *F = MF->getFunction();
// If any landing pads survive, we need an EH table.
bool hasLandingPads = !MMI->getLandingPads().empty();
@@ -104,10 +105,24 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
- const Function *Per = MMI->getPersonality();
-
- shouldEmitPersonality = hasLandingPads &&
- PerEncoding != dwarf::DW_EH_PE_omit && Per;
+ const Function *Per = nullptr;
+ if (F->hasPersonalityFn())
+ Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ assert(!MMI->getPersonality() || Per == MMI->getPersonality());
+
+ // Emit a personality function even when there are no landing pads
+ bool forceEmitPersonality =
+ // ...if a personality function is explicitly specified
+ F->hasPersonalityFn() &&
+ // ... and it's not known to be a noop in the absence of invokes
+ !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+ // ... and we're not explicitly asked not to emit it
+ F->needsUnwindTableEntry();
+
+ shouldEmitPersonality =
+ (forceEmitPersonality ||
+ (hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit)) &&
+ Per;
unsigned LSDAEncoding = TLOF.getLSDAEncoding();
shouldEmitLSDA = shouldEmitPersonality &&
@@ -123,6 +138,11 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
if (!shouldEmitPersonality)
return;
+ // If we are forced to emit this personality, make sure to record
+ // it because it might not appear in any landingpad
+ if (forceEmitPersonality)
+ MMI->addPersonality(Per);
+
const MCSymbol *Sym =
TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 1c3e2aec64ab..01f34c6eb81c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -49,7 +49,7 @@ class DwarfUnit;
class MachineModuleInfo;
//===----------------------------------------------------------------------===//
-/// \brief This class is used to record source line correspondence.
+/// This class is used to record source line correspondence.
class SrcLineInfo {
unsigned Line; // Source line number.
unsigned Column; // Source column.
@@ -161,7 +161,7 @@ public:
return dwarf::DW_TAG_variable;
}
- /// \brief Return true if DbgVariable is artificial.
+ /// Return true if DbgVariable is artificial.
bool isArtificial() const {
if (Var->isArtificial())
return true;
@@ -190,149 +190,152 @@ public:
const DIType *getType() const;
private:
- /// resolve - Look in the DwarfDebug map for the MDNode that
+ /// Look in the DwarfDebug map for the MDNode that
/// corresponds to the reference.
template <typename T> T *resolve(TypedDINodeRef<T> Ref) const;
};
-/// \brief Helper used to pair up a symbol and its DWARF compile unit.
+/// Helper used to pair up a symbol and its DWARF compile unit.
struct SymbolCU {
SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
const MCSymbol *Sym;
DwarfCompileUnit *CU;
};
-/// \brief Collects and handles dwarf debug information.
+/// Collects and handles dwarf debug information.
class DwarfDebug : public AsmPrinterHandler {
- // Target of Dwarf emission.
+ /// Target of Dwarf emission.
AsmPrinter *Asm;
- // Collected machine module information.
+ /// Collected machine module information.
MachineModuleInfo *MMI;
- // All DIEValues are allocated through this allocator.
+ /// All DIEValues are allocated through this allocator.
BumpPtrAllocator DIEValueAllocator;
- // Maps MDNode with its corresponding DwarfCompileUnit.
+ /// Maps MDNode with its corresponding DwarfCompileUnit.
MapVector<const MDNode *, DwarfCompileUnit *> CUMap;
- // Maps subprogram MDNode with its corresponding DwarfCompileUnit.
+ /// Maps subprogram MDNode with its corresponding DwarfCompileUnit.
MapVector<const MDNode *, DwarfCompileUnit *> SPMap;
- // Maps a CU DIE with its corresponding DwarfCompileUnit.
+ /// Maps a CU DIE with its corresponding DwarfCompileUnit.
DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap;
- // List of all labels used in aranges generation.
+ /// List of all labels used in aranges generation.
std::vector<SymbolCU> ArangeLabels;
- // Size of each symbol emitted (for those symbols that have a specific size).
+ /// Size of each symbol emitted (for those symbols that have a specific size).
DenseMap<const MCSymbol *, uint64_t> SymSize;
LexicalScopes LScopes;
- // Collection of abstract variables.
+ /// Collection of abstract variables.
DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
- // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
- // can refer to them in spite of insertions into this list.
+ /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
+ /// can refer to them in spite of insertions into this list.
DebugLocStream DebugLocs;
- // This is a collection of subprogram MDNodes that are processed to
- // create DIEs.
+ /// This is a collection of subprogram MDNodes that are processed to
+ /// create DIEs.
SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
- // Maps instruction with label emitted before instruction.
+ /// Maps instruction with label emitted before instruction.
DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
- // Maps instruction with label emitted after instruction.
+ /// Maps instruction with label emitted after instruction.
DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
- // History of DBG_VALUE and clobber instructions for each user variable.
- // Variables are listed in order of appearance.
+ /// History of DBG_VALUE and clobber instructions for each user
+ /// variable. Variables are listed in order of appearance.
DbgValueHistoryMap DbgValues;
- // Previous instruction's location information. This is used to determine
- // label location to indicate scope boundries in dwarf debug info.
+ /// Previous instruction's location information. This is used to
+ /// determine label location to indicate scope boundries in dwarf
+ /// debug info.
DebugLoc PrevInstLoc;
MCSymbol *PrevLabel;
- // This location indicates end of function prologue and beginning of function
- // body.
+ /// This location indicates end of function prologue and beginning of
+ /// function body.
DebugLoc PrologEndLoc;
- // If nonnull, stores the current machine function we're processing.
+ /// If nonnull, stores the current machine function we're processing.
const MachineFunction *CurFn;
- // If nonnull, stores the current machine instruction we're processing.
+ /// If nonnull, stores the current machine instruction we're processing.
const MachineInstr *CurMI;
- // If nonnull, stores the CU in which the previous subprogram was contained.
+ /// If nonnull, stores the CU in which the previous subprogram was contained.
const DwarfCompileUnit *PrevCU;
- // As an optimization, there is no need to emit an entry in the directory
- // table for the same directory as DW_AT_comp_dir.
+ /// As an optimization, there is no need to emit an entry in the directory
+ /// table for the same directory as DW_AT_comp_dir.
StringRef CompilationDir;
- // Holder for the file specific debug information.
+ /// Holder for the file specific debug information.
DwarfFile InfoHolder;
- // Holders for the various debug information flags that we might need to
- // have exposed. See accessor functions below for description.
+ /// Holders for the various debug information flags that we might need to
+ /// have exposed. See accessor functions below for description.
- // Holder for imported entities.
+ /// Holder for imported entities.
typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
ImportedEntityMap;
ImportedEntityMap ScopesWithImportedEntities;
- // Map from MDNodes for user-defined types to the type units that describe
- // them.
+ /// Map from MDNodes for user-defined types to the type units that
+ /// describe them.
DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
SmallVector<
std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
TypeUnitsUnderConstruction;
- // Whether to emit the pubnames/pubtypes sections.
+ /// Whether to emit the pubnames/pubtypes sections.
bool HasDwarfPubSections;
- // Whether or not to use AT_ranges for compilation units.
+ /// Whether or not to use AT_ranges for compilation units.
bool HasCURanges;
- // Whether we emitted a function into a section other than the default
- // text.
+ /// Whether we emitted a function into a section other than the
+ /// default text.
bool UsedNonDefaultText;
- // Whether to use the GNU TLS opcode (instead of the standard opcode).
+ /// Whether to use the GNU TLS opcode (instead of the standard opcode).
bool UseGNUTLSOpcode;
- // Version of dwarf we're emitting.
+ /// Version of dwarf we're emitting.
unsigned DwarfVersion;
- // Maps from a type identifier to the actual MDNode.
+ /// Maps from a type identifier to the actual MDNode.
DITypeIdentifierMap TypeIdentifierMap;
- // DWARF5 Experimental Options
+ /// DWARF5 Experimental Options
+ /// @{
bool HasDwarfAccelTables;
bool HasSplitDwarf;
- // Separated Dwarf Variables
- // In general these will all be for bits that are left in the
- // original object file, rather than things that are meant
- // to be in the .dwo sections.
+ /// Separated Dwarf Variables
+ /// In general these will all be for bits that are left in the
+ /// original object file, rather than things that are meant
+ /// to be in the .dwo sections.
- // Holder for the skeleton information.
+ /// Holder for the skeleton information.
DwarfFile SkeletonHolder;
- /// Store file names for type units under fission in a line table header that
- /// will be emitted into debug_line.dwo.
- // FIXME: replace this with a map from comp_dir to table so that we can emit
- // multiple tables during LTO each of which uses directory 0, referencing the
- // comp_dir of all the type units that use it.
+ /// Store file names for type units under fission in a line table
+ /// header that will be emitted into debug_line.dwo.
+ // FIXME: replace this with a map from comp_dir to table so that we
+ // can emit multiple tables during LTO each of which uses directory
+ // 0, referencing the comp_dir of all the type units that use it.
MCDwarfDwoLineTable SplitTypeUnitFileTable;
-
- // True iff there are multiple CUs in this module.
+ /// @}
+
+ /// True iff there are multiple CUs in this module.
bool SingleCU;
bool IsDarwin;
bool IsPS4;
@@ -354,7 +357,7 @@ class DwarfDebug : public AsmPrinterHandler {
typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
- /// \brief Find abstract variable associated with Var.
+ /// Find abstract variable associated with Var.
DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
const DILocalVariable *&Cleansed);
DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
@@ -366,56 +369,56 @@ class DwarfDebug : public AsmPrinterHandler {
DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV);
- /// \brief Construct a DIE for this abstract scope.
+ /// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
- /// \brief Compute the size and offset of a DIE given an incoming Offset.
+ /// Compute the size and offset of a DIE given an incoming Offset.
unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
- /// \brief Compute the size and offset of all the DIEs.
+ /// Compute the size and offset of all the DIEs.
void computeSizeAndOffsets();
- /// \brief Collect info for variables that were optimized out.
+ /// Collect info for variables that were optimized out.
void collectDeadVariables();
void finishVariableDefinitions();
void finishSubprogramDefinitions();
- /// \brief Finish off debug information after all functions have been
+ /// Finish off debug information after all functions have been
/// processed.
void finalizeModuleInfo();
- /// \brief Emit the debug info section.
+ /// Emit the debug info section.
void emitDebugInfo();
- /// \brief Emit the abbreviation section.
+ /// Emit the abbreviation section.
void emitAbbreviations();
- /// \brief Emit a specified accelerator table.
+ /// Emit a specified accelerator table.
void emitAccel(DwarfAccelTable &Accel, MCSection *Section,
StringRef TableName);
- /// \brief Emit visible names into a hashed accelerator table section.
+ /// Emit visible names into a hashed accelerator table section.
void emitAccelNames();
- /// \brief Emit objective C classes and categories into a hashed
+ /// Emit objective C classes and categories into a hashed
/// accelerator table section.
void emitAccelObjC();
- /// \brief Emit namespace dies into a hashed accelerator table.
+ /// Emit namespace dies into a hashed accelerator table.
void emitAccelNamespaces();
- /// \brief Emit type dies into a hashed accelerator table.
+ /// Emit type dies into a hashed accelerator table.
void emitAccelTypes();
- /// \brief Emit visible names into a debug pubnames section.
+ /// Emit visible names into a debug pubnames section.
/// \param GnuStyle determines whether or not we want to emit
/// additional information into the table ala newer gcc for gdb
/// index.
void emitDebugPubNames(bool GnuStyle = false);
- /// \brief Emit visible types into a debug pubtypes section.
+ /// Emit visible types into a debug pubtypes section.
/// \param GnuStyle determines whether or not we want to emit
/// additional information into the table ala newer gcc for gdb
/// index.
@@ -425,91 +428,91 @@ class DwarfDebug : public AsmPrinterHandler {
bool GnuStyle, MCSection *PSec, StringRef Name,
const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const);
- /// \brief Emit visible names into a debug str section.
+ /// Emit visible names into a debug str section.
void emitDebugStr();
- /// \brief Emit visible names into a debug loc section.
+ /// Emit visible names into a debug loc section.
void emitDebugLoc();
- /// \brief Emit visible names into a debug loc dwo section.
+ /// Emit visible names into a debug loc dwo section.
void emitDebugLocDWO();
- /// \brief Emit visible names into a debug aranges section.
+ /// Emit visible names into a debug aranges section.
void emitDebugARanges();
- /// \brief Emit visible names into a debug ranges section.
+ /// Emit visible names into a debug ranges section.
void emitDebugRanges();
- /// \brief Emit inline info using custom format.
+ /// Emit inline info using custom format.
void emitDebugInlineInfo();
/// DWARF 5 Experimental Split Dwarf Emitters
- /// \brief Initialize common features of skeleton units.
+ /// Initialize common features of skeleton units.
void initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfUnit> NewU);
- /// \brief Construct the split debug info compile unit for the debug info
+ /// Construct the split debug info compile unit for the debug info
/// section.
DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);
- /// \brief Construct the split debug info compile unit for the debug info
+ /// Construct the split debug info compile unit for the debug info
/// section.
DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU);
- /// \brief Emit the debug info dwo section.
+ /// Emit the debug info dwo section.
void emitDebugInfoDWO();
- /// \brief Emit the debug abbrev dwo section.
+ /// Emit the debug abbrev dwo section.
void emitDebugAbbrevDWO();
- /// \brief Emit the debug line dwo section.
+ /// Emit the debug line dwo section.
void emitDebugLineDWO();
- /// \brief Emit the debug str dwo section.
+ /// Emit the debug str dwo section.
void emitDebugStrDWO();
/// Flags to let the linker know we have emitted new style pubnames. Only
/// emit it here if we don't have a skeleton CU for split dwarf.
void addGnuPubAttributes(DwarfUnit &U, DIE &D) const;
- /// \brief Create new DwarfCompileUnit for the given metadata node with tag
+ /// Create new DwarfCompileUnit for the given metadata node with tag
/// DW_TAG_compile_unit.
DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit);
- /// \brief Construct imported_module or imported_declaration DIE.
+ /// Construct imported_module or imported_declaration DIE.
void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
const DIImportedEntity *N);
- /// \brief Register a source line with debug info. Returns the unique
+ /// Register a source line with debug info. Returns the unique
/// label that was emitted and which provides correspondence to the
/// source line list.
void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
unsigned Flags);
- /// \brief Indentify instructions that are marking the beginning of or
+ /// Indentify instructions that are marking the beginning of or
/// ending of a scope.
void identifyScopeMarkers();
- /// \brief Populate LexicalScope entries with variables' info.
+ /// Populate LexicalScope entries with variables' info.
void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
DenseSet<InlinedVariable> &ProcessedVars);
- /// \brief Build the location list for all DBG_VALUEs in the
+ /// Build the location list for all DBG_VALUEs in the
/// function that describe the same variable.
void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
const DbgValueHistoryMap::InstrRanges &Ranges);
- /// \brief Collect variable information from the side table maintained
+ /// Collect variable information from the side table maintained
/// by MMI.
void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
- /// \brief Ensure that a label will be emitted before MI.
+ /// Ensure that a label will be emitted before MI.
void requestLabelBeforeInsn(const MachineInstr *MI) {
LabelsBeforeInsn.insert(std::make_pair(MI, nullptr));
}
- /// \brief Ensure that a label will be emitted after MI.
+ /// Ensure that a label will be emitted after MI.
void requestLabelAfterInsn(const MachineInstr *MI) {
LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
}
@@ -522,50 +525,50 @@ public:
~DwarfDebug() override;
- /// \brief Emit all Dwarf sections that should come prior to the
+ /// Emit all Dwarf sections that should come prior to the
/// content.
void beginModule();
- /// \brief Emit all Dwarf sections that should come after the content.
+ /// Emit all Dwarf sections that should come after the content.
void endModule() override;
- /// \brief Gather pre-function debug information.
+ /// Gather pre-function debug information.
void beginFunction(const MachineFunction *MF) override;
- /// \brief Gather and emit post-function debug information.
+ /// Gather and emit post-function debug information.
void endFunction(const MachineFunction *MF) override;
- /// \brief Process beginning of an instruction.
+ /// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
- /// \brief Process end of an instruction.
+ /// Process end of an instruction.
void endInstruction() override;
- /// \brief Add a DIE to the set of types that we're going to pull into
+ /// Add a DIE to the set of types that we're going to pull into
/// type units.
void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
DIE &Die, const DICompositeType *CTy);
- /// \brief Add a label so that arange data can be generated for it.
+ /// Add a label so that arange data can be generated for it.
void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
- /// \brief For symbols that have a size designated (e.g. common symbols),
+ /// For symbols that have a size designated (e.g. common symbols),
/// this tracks that size.
void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {
SymSize[Sym] = Size;
}
- /// \brief Returns whether to use DW_OP_GNU_push_tls_address, instead of the
+ /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the
/// standard DW_OP_form_tls_address opcode
bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
// Experimental DWARF5 features.
- /// \brief Returns whether or not to emit tables that dwarf consumers can
+ /// Returns whether or not to emit tables that dwarf consumers can
/// use to accelerate lookup.
bool useDwarfAccelTables() const { return HasDwarfAccelTables; }
- /// \brief Returns whether or not to change the current debug info for the
+ /// Returns whether or not to change the current debug info for the
/// split dwarf proposal support.
bool useSplitDwarf() const { return HasSplitDwarf; }
@@ -579,7 +582,7 @@ public:
/// Returns the entries for the .debug_loc section.
const DebugLocStream &getDebugLocs() const { return DebugLocs; }
- /// \brief Emit an entry for the debug loc section. This can be used to
+ /// Emit an entry for the debug loc section. This can be used to
/// handle an entry that's going to be emitted into the debug loc section.
void emitDebugLocEntry(ByteStreamer &Streamer,
const DebugLocStream::Entry &Entry);
@@ -592,7 +595,7 @@ public:
return Ref.resolve(TypeIdentifierMap);
}
- /// \brief Return the TypeIdentifierMap.
+ /// Return the TypeIdentifierMap.
const DITypeIdentifierMap &getTypeIdentifierMap() const {
return TypeIdentifierMap;
}
@@ -627,14 +630,14 @@ public:
less_first()));
}
- /// \brief A helper function to check whether the DIE for a given Scope is
+ /// A helper function to check whether the DIE for a given Scope is
/// going to be null.
bool isLexicalScopeDIENull(LexicalScope *Scope);
- /// \brief Return Label preceding the instruction.
+ /// Return Label preceding the instruction.
MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
- /// \brief Return Label immediately following the instruction.
+ /// Return Label immediately following the instruction.
MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
// FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 4000ae48a856..44d9d2245dda 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -113,7 +113,7 @@ protected:
DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
- /// \brief Add a string attribute data and value.
+ /// Add a string attribute data and value.
///
/// This is guaranteed to be in the local string pool instead of indirected.
void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
@@ -142,10 +142,10 @@ public:
unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
- /// \brief Return true if this compile unit has something to write out.
+ /// Return true if this compile unit has something to write out.
bool hasContent() const { return UnitDie.hasChildren(); }
- /// \brief Get string containing language specific context for a global name.
+ /// Get string containing language specific context for a global name.
///
/// Walks the metadata parent chain in a language specific manner (using the
/// compile unit language) and returns it as a string. This is done at the
@@ -162,42 +162,42 @@ public:
virtual void addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {}
- /// \brief Add a new name to the namespace accelerator table.
+ /// Add a new name to the namespace accelerator table.
void addAccelNamespace(StringRef Name, const DIE &Die);
- /// \brief Returns the DIE map slot for the specified debug variable.
+ /// Returns the DIE map slot for the specified debug variable.
///
/// We delegate the request to DwarfDebug when the MDNode can be part of the
/// type system, since DIEs for the type system can be shared across CUs and
/// the mappings are kept in DwarfDebug.
DIE *getDIE(const DINode *D) const;
- /// \brief Returns a fresh newly allocated DIELoc.
+ /// Returns a fresh newly allocated DIELoc.
DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc; }
- /// \brief Insert DIE into the map.
+ /// Insert DIE into the map.
///
/// We delegate the request to DwarfDebug when the MDNode can be part of the
/// type system, since DIEs for the type system can be shared across CUs and
/// the mappings are kept in DwarfDebug.
void insertDIE(const DINode *Desc, DIE *D);
- /// \brief Add a flag that is true to the DIE.
+ /// Add a flag that is true to the DIE.
void addFlag(DIE &Die, dwarf::Attribute Attribute);
- /// \brief Add an unsigned integer attribute data and value.
+ /// Add an unsigned integer attribute data and value.
void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
uint64_t Integer);
void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer);
- /// \brief Add an signed integer attribute data and value.
+ /// Add an signed integer attribute data and value.
void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
int64_t Integer);
void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
- /// \brief Add a string attribute data and value.
+ /// Add a string attribute data and value.
///
/// We always emit a reference to the string pool instead of immediate
/// strings so that DIEs have more predictable sizes. In the case of split
@@ -205,38 +205,38 @@ public:
/// into the string table.
void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
- /// \brief Add a Dwarf label attribute data and value.
+ /// Add a Dwarf label attribute data and value.
DIE::value_iterator addLabel(DIE &Die, dwarf::Attribute Attribute,
dwarf::Form Form, const MCSymbol *Label);
void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
- /// \brief Add an offset into a section attribute data and value.
+ /// Add an offset into a section attribute data and value.
void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer);
- /// \brief Add a dwarf op address data and value using the form given and an
+ /// Add a dwarf op address data and value using the form given and an
/// op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
void addOpAddress(DIELoc &Die, const MCSymbol *Label);
- /// \brief Add a label delta attribute data and value.
+ /// Add a label delta attribute data and value.
void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
const MCSymbol *Lo);
- /// \brief Add a DIE attribute data and value.
+ /// Add a DIE attribute data and value.
void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry);
- /// \brief Add a DIE attribute data and value.
+ /// Add a DIE attribute data and value.
void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry);
void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type);
- /// \brief Add block data.
+ /// Add block data.
void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
- /// \brief Add block data.
+ /// Add block data.
void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
- /// \brief Add location information to specified debug information entry.
+ /// Add location information to specified debug information entry.
void addSourceLine(DIE &Die, unsigned Line, StringRef File,
StringRef Directory);
void addSourceLine(DIE &Die, const DILocalVariable *V);
@@ -246,30 +246,30 @@ public:
void addSourceLine(DIE &Die, const DINamespace *NS);
void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
- /// \brief Add constant value entry in variable DIE.
+ /// Add constant value entry in variable DIE.
void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty);
void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
- /// \brief Add constant value entry in variable DIE.
+ /// Add constant value entry in variable DIE.
void addConstantFPValue(DIE &Die, const MachineOperand &MO);
void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
- /// \brief Add a linkage name, if it isn't empty.
+ /// Add a linkage name, if it isn't empty.
void addLinkageName(DIE &Die, StringRef LinkageName);
- /// \brief Add template parameters in buffer.
+ /// Add template parameters in buffer.
void addTemplateParams(DIE &Buffer, DINodeArray TParams);
- /// \brief Add register operand.
+ /// Add register operand.
/// \returns false if the register does not exist, e.g., because it was never
/// materialized.
bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
unsigned SizeInBits = 0, unsigned OffsetInBits = 0);
- /// \brief Add register offset.
+ /// Add register offset.
/// \returns false if the register does not exist, e.g., because it was never
/// materialized.
bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
@@ -283,7 +283,7 @@ public:
dwarf::Attribute Attribute,
const MachineLocation &Location);
- /// \brief Add a new type attribute to the specified entity.
+ /// Add a new type attribute to the specified entity.
///
/// This takes and attribute parameter because DW_AT_friend attributes are
/// also type references.
@@ -297,19 +297,19 @@ public:
void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
bool Minimal = false);
- /// \brief Find existing DIE or create new DIE for the given type.
+ /// Find existing DIE or create new DIE for the given type.
DIE *getOrCreateTypeDIE(const MDNode *N);
- /// \brief Get context owner's DIE.
+ /// Get context owner's DIE.
DIE *createTypeDIE(const DICompositeType *Ty);
- /// \brief Get context owner's DIE.
+ /// Get context owner's DIE.
DIE *getOrCreateContextDIE(const DIScope *Context);
- /// \brief Construct DIEs for types that contain vtables.
+ /// Construct DIEs for types that contain vtables.
void constructContainingTypeDIEs();
- /// \brief Construct function argument DIEs.
+ /// Construct function argument DIEs.
void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args);
/// Create a DIE with the given Tag, add the DIE to its parent, and
@@ -332,14 +332,14 @@ public:
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
protected:
- /// \brief Create new static data member DIE.
+ /// Create new static data member DIE.
DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT);
/// Look up the source ID with the given directory and source file names. If
/// none currently exists, create a new ID and insert it in the line table.
virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0;
- /// \brief Look in the DwarfDebug map for the MDNode that corresponds to the
+ /// Look in the DwarfDebug map for the MDNode that corresponds to the
/// reference.
template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
return DD->resolve(Ref);
@@ -358,15 +358,15 @@ private:
void constructTemplateValueParameterDIE(DIE &Buffer,
const DITemplateValueParameter *TVP);
- /// \brief Return the default lower bound for an array.
+ /// Return the default lower bound for an array.
///
/// If the DWARF version doesn't handle the language, return -1.
int64_t getDefaultLowerBound() const;
- /// \brief Get an anonymous type for index type.
+ /// Get an anonymous type for index type.
DIE *getIndexTyDie();
- /// \brief Set D as anonymous type for index which can be reused later.
+ /// Set D as anonymous type for index which can be reused later.
void setIndexTyDie(DIE *D) { IndexTyDie = D; }
/// If this is a named finished type then include it in the list of types for
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 1be3fd74d602..49ef8d3ddc8f 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -309,7 +309,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// If some instruction between the previous try-range and the end of the
// function may throw, create a call-site entry with no landing pad for the
// region following the try-range.
- if (SawPotentiallyThrowing && !IsSJLJ) {
+ if (SawPotentiallyThrowing && !IsSJLJ && LastLabel != nullptr) {
CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
CallSites.push_back(Site);
}
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 535b1f605853..6610ac78f8c4 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -97,7 +97,7 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
Asm->OutStreamer->EmitLabel(MCL);
CurFn->Instrs.push_back(MCL);
- InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine());
+ InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine(), DL.getCol());
}
WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
@@ -264,22 +264,38 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
// Identify the function this subsection is for.
Asm->OutStreamer->EmitCOFFSecRel32(Fn);
Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
- // Insert padding after a 16-bit section index.
- Asm->EmitInt16(0);
+ // Insert flags after a 16-bit section index.
+ Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS);
// Length of the function's code, in bytes.
EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
// PC-to-linenumber lookup table:
MCSymbol *FileSegmentEnd = nullptr;
+
+ // The start of the last segment:
+ size_t LastSegmentStart = 0;
+
+ auto FinishPreviousChunk = [&] {
+ if (!FileSegmentEnd)
+ return;
+ for (size_t ColSegI = LastSegmentStart,
+ ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart];
+ ColSegI != ColSegEnd; ++ColSegI) {
+ unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber;
+ Asm->EmitInt16(ColumnNumber); // Start column
+ Asm->EmitInt16(ColumnNumber); // End column
+ }
+ Asm->OutStreamer->EmitLabel(FileSegmentEnd);
+ };
+
for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) {
MCSymbol *Instr = FI.Instrs[J];
assert(InstrInfo.count(Instr));
if (FilenameSegmentLengths.count(J)) {
// We came to a beginning of a new filename segment.
- if (FileSegmentEnd)
- Asm->OutStreamer->EmitLabel(FileSegmentEnd);
+ FinishPreviousChunk();
StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename;
assert(FileNameRegistry.Infos.count(CurFilename));
size_t IndexInStringTable =
@@ -300,6 +316,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
// records.
FileSegmentEnd = Asm->MMI->getContext().createTempSymbol();
EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
+ LastSegmentStart = J;
}
// The first PC with the given linenumber and the linenumber itself.
@@ -307,8 +324,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
Asm->EmitInt32(InstrInfo[Instr].LineNumber);
}
- if (FileSegmentEnd)
- Asm->OutStreamer->EmitLabel(FileSegmentEnd);
+ FinishPreviousChunk();
Asm->OutStreamer->EmitLabel(LineTableEnd);
}
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index a5b399f73707..43d1a432712e 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -52,11 +52,13 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler {
struct InstrInfoTy {
StringRef Filename;
unsigned LineNumber;
+ unsigned ColumnNumber;
- InstrInfoTy() : LineNumber(0) {}
+ InstrInfoTy() : LineNumber(0), ColumnNumber(0) {}
- InstrInfoTy(StringRef Filename, unsigned LineNumber)
- : Filename(Filename), LineNumber(LineNumber) {}
+ InstrInfoTy(StringRef Filename, unsigned LineNumber, unsigned ColumnNumber)
+ : Filename(Filename), LineNumber(LineNumber),
+ ColumnNumber(ColumnNumber) {}
};
DenseMap<MCSymbol *, InstrInfoTy> InstrInfo;
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 79830bc3443b..71c77815e281 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -70,19 +70,27 @@ void WinException::beginFunction(const MachineFunction *MF) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
- const Function *Per = MMI->getPersonality();
+ const Function *Per = nullptr;
+ if (F->hasPersonalityFn())
+ Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
- shouldEmitPersonality = hasLandingPads &&
- PerEncoding != dwarf::DW_EH_PE_omit && Per;
+ bool forceEmitPersonality =
+ F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+ F->needsUnwindTableEntry();
+
+ shouldEmitPersonality = forceEmitPersonality || (hasLandingPads &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per);
unsigned LSDAEncoding = TLOF.getLSDAEncoding();
shouldEmitLSDA = shouldEmitPersonality &&
LSDAEncoding != dwarf::DW_EH_PE_omit;
- // If we're not using CFI, we don't want the CFI or the personality. Emit the
- // LSDA if this is the parent function.
+ // If we're not using CFI, we don't want the CFI or the personality. If
+ // WinEHPrepare outlined something, we should emit the LSDA.
if (!Asm->MAI->usesWindowsCFI()) {
- shouldEmitLSDA = (hasLandingPads && F == ParentF);
+ bool HasOutlinedChildren =
+ F->hasFnAttribute("wineh-parent") && F == ParentF;
+ shouldEmitLSDA = HasOutlinedChildren;
shouldEmitPersonality = false;
return;
}
@@ -121,7 +129,10 @@ void WinException::endFunction(const MachineFunction *MF) {
if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA)
return;
- EHPersonality Per = MMI->getPersonalityType();
+ const Function *F = MF->getFunction();
+ EHPersonality Per = EHPersonality::Unknown;
+ if (F->hasPersonalityFn())
+ Per = classifyEHPersonality(F->getPersonalityFn());
// Get rid of any dead landing pads if we're not using a Windows EH scheme. In
// Windows EH schemes, the landing pad is not actually reachable. It only
@@ -350,6 +361,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// EHFlags & 1 -> Synchronous exceptions only, no async exceptions.
// EHFlags & 2 -> ???
// EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
+ OS.EmitValueToAlignment(4);
OS.EmitLabel(FuncInfoXData);
OS.EmitIntValue(0x19930522, 4); // MagicNumber
OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4); // MaxState
@@ -555,7 +567,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
// we've code generated the parent, we can emit the label assignment that
// those helpers use to get the offset of the registration node.
assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX &&
- "no EH reg node frameescape index");
+ "no EH reg node localescape index");
MCSymbol *ParentFrameOffset =
Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName);
MCSymbol *RegistrationOffsetSym = Asm->OutContext.getOrCreateFrameAllocSymbol(
@@ -578,9 +590,11 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
// Emit the __ehtable label that we use for llvm.x86.seh.lsda.
MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName);
+ OS.EmitValueToAlignment(4);
OS.EmitLabel(LSDALabel);
- const Function *Per = MMI->getPersonality();
+ const Function *Per =
+ dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
StringRef PerName = Per->getName();
int BaseState = -1;
if (PerName == "_except_handler4") {
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 82f5c482408a..db00910cd018 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -34,4 +34,5 @@ cl::opt<unsigned>
cl::Hidden);
BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F)
- : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 70de4e7ebd11..6ab6acc03722 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -147,10 +147,13 @@ class TypePromotionTransaction;
/// OptSize - True if optimizing for size.
bool OptSize;
+ /// DataLayout for the Function being processed.
+ const DataLayout *DL;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) {
+ : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -203,6 +206,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
+ DL = &F.getParent()->getDataLayout();
+
bool EverMadeChange = false;
// Clear per function information.
InsertedInsts.clear();
@@ -753,10 +758,11 @@ static bool SinkCast(CastInst *CI) {
///
/// Return true if any changes are made.
///
-static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
+ const DataLayout &DL) {
// If this is a noop copy,
- EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(CI->getType());
+ EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, CI->getType());
// This is an fp<->int conversion?
if (SrcVT.isInteger() != DstVT.isInteger())
@@ -921,7 +927,7 @@ static bool isExtractBitsCandidateUse(Instruction *User) {
static bool
SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, const DataLayout &DL) {
BasicBlock *UserBB = User->getParent();
DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
TruncInst *TruncI = dyn_cast<TruncInst>(User);
@@ -947,7 +953,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
// approximation; some nodes' legality is determined by the
// operand or other means. There's no good way to find out though.
if (TLI.isOperationLegalOrCustom(
- ISDOpcode, TLI.getValueType(TruncUser->getType(), true)))
+ ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
continue;
// Don't bother for PHI nodes.
@@ -1005,13 +1011,14 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
/// instruction.
/// Return true if any changes are made.
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI,
+ const DataLayout &DL) {
BasicBlock *DefBB = ShiftI->getParent();
/// Only insert instructions in each block once.
DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
- bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
+ bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
bool MadeChange = false;
for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
@@ -1048,9 +1055,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
if (isa<TruncInst>(User) && shiftIsLegal
// If the type of the truncate is legal, no trucate will be
// introduced in other basic blocks.
- && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
+ &&
+ (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
MadeChange =
- SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
+ SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
continue;
}
@@ -1307,12 +1315,10 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return true;
}
- const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
-
// Align the pointer arguments to this call if the target thinks it's a good
// idea
unsigned MinSize, PrefAlign;
- if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+ if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
for (auto &Arg : CI->arg_operands()) {
// We want to align both objects whose address is used directly and
// objects whose address is used in casts and GEPs, though it only makes
@@ -1320,36 +1326,34 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
// if size - offset meets the size threshold.
if (!Arg->getType()->isPointerTy())
continue;
- APInt Offset(TD->getPointerSizeInBits(
- cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
- Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
+ APInt Offset(DL->getPointerSizeInBits(
+ cast<PointerType>(Arg->getType())->getAddressSpace()),
+ 0);
+ Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
uint64_t Offset2 = Offset.getLimitedValue();
if ((Offset2 & (PrefAlign-1)) != 0)
continue;
AllocaInst *AI;
- if ((AI = dyn_cast<AllocaInst>(Val)) &&
- AI->getAlignment() < PrefAlign &&
- TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+ if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
+ DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
AI->setAlignment(PrefAlign);
// Global variables can only be aligned if they are defined in this
// object (i.e. they are uniquely initialized in this object), and
// over-aligning global variables that have an explicit section is
// forbidden.
GlobalVariable *GV;
- if ((GV = dyn_cast<GlobalVariable>(Val)) &&
- GV->hasUniqueInitializer() &&
- !GV->hasSection() &&
- GV->getAlignment() < PrefAlign &&
- TD->getTypeAllocSize(
- GV->getType()->getElementType()) >= MinSize + Offset2)
+ if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->hasUniqueInitializer() &&
+ !GV->hasSection() && GV->getAlignment() < PrefAlign &&
+ DL->getTypeAllocSize(GV->getType()->getElementType()) >=
+ MinSize + Offset2)
GV->setAlignment(PrefAlign);
}
// If this is a memcpy (or similar) then we may be able to improve the
// alignment
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
- unsigned Align = getKnownAlignment(MI->getDest(), *TD);
+ unsigned Align = getKnownAlignment(MI->getDest(), *DL);
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
- Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
+ Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL));
if (Align > MI->getAlignment())
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
}
@@ -2099,6 +2103,7 @@ class AddressingModeMatcher {
SmallVectorImpl<Instruction*> &AddrModeInsts;
const TargetMachine &TM;
const TargetLowering &TLI;
+ const DataLayout &DL;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
/// the memory instruction that we're computing this address for.
@@ -2131,8 +2136,9 @@ class AddressingModeMatcher {
: AddrModeInsts(AMI), TM(TM),
TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
->getTargetLowering()),
- AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
- InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT) {
+ DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
+ MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
+ PromotedInsts(PromotedInsts), TPT(TPT) {
IgnoreProfitability = false;
}
public:
@@ -2199,7 +2205,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
TestAddrMode.ScaledReg = ScaleReg;
// If the new address isn't legal, bail out.
- if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace))
+ if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
return false;
// It was legal, so commit it.
@@ -2216,7 +2222,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
// If this addressing mode is legal, commit it and remember that we folded
// this instruction.
- if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace)) {
+ if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
AddrMode = TestAddrMode;
return true;
@@ -2262,7 +2268,8 @@ static bool MightBeFoldableInst(Instruction *I) {
/// \note \p Val is assumed to be the product of some type promotion.
/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
/// to be legal, as the non-promoted value would have had the same state.
-static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
+static bool isPromotedInstructionLegal(const TargetLowering &TLI,
+ const DataLayout &DL, Value *Val) {
Instruction *PromotedInst = dyn_cast<Instruction>(Val);
if (!PromotedInst)
return false;
@@ -2272,7 +2279,7 @@ static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
return true;
// Otherwise, check if the promoted instruction is legal or not.
return TLI.isOperationLegalOrCustom(
- ISDOpcode, TLI.getValueType(PromotedInst->getType()));
+ ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
}
/// \brief Hepler class to perform type promotion.
@@ -2646,7 +2653,7 @@ bool AddressingModeMatcher::IsPromotionProfitable(
// The promotion is neutral but it may help folding the sign extension in
// loads for instance.
// Check that we did not create an illegal instruction.
- return isPromotedInstructionLegal(TLI, PromotedOperand);
+ return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
}
/// MatchOperationAddr - Given an instruction or constant expr, see if we can
@@ -2674,12 +2681,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::PtrToInt:
// PtrToInt is always a noop, as we know that the int type is pointer sized.
return MatchAddr(AddrInst->getOperand(0), Depth);
- case Instruction::IntToPtr:
+ case Instruction::IntToPtr: {
+ auto AS = AddrInst->getType()->getPointerAddressSpace();
+ auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
// This inttoptr is a no-op if the integer type is pointer sized.
- if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
- TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
+ if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
return MatchAddr(AddrInst->getOperand(0), Depth);
return false;
+ }
case Instruction::BitCast:
// BitCast is always a noop, and we can handle it as long as it is
// int->int or pointer->pointer (we don't want int<->fp or something).
@@ -2752,16 +2761,15 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned VariableScale = 0;
int64_t ConstantOffset = 0;
- const DataLayout *TD = TLI.getDataLayout();
gep_type_iterator GTI = gep_type_begin(AddrInst);
for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- const StructLayout *SL = TD->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx =
cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
ConstantOffset += SL->getElementOffset(Idx);
} else {
- uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+ uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
ConstantOffset += CI->getSExtValue()*TypeSize;
} else if (TypeSize) { // Scales of zero don't do anything.
@@ -2781,7 +2789,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
if (VariableOperand == -1) {
AddrMode.BaseOffs += ConstantOffset;
if (ConstantOffset == 0 ||
- TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) {
+ TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
// Check to see if we can fold the base pointer in too.
if (MatchAddr(AddrInst->getOperand(0), Depth+1))
return true;
@@ -2904,14 +2912,14 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
// Fold in immediates if legal for the target.
AddrMode.BaseOffs += CI->getSExtValue();
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.BaseOffs -= CI->getSExtValue();
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
// If this is a global variable, try to fold it into the addressing mode.
if (!AddrMode.BaseGV) {
AddrMode.BaseGV = GV;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.BaseGV = nullptr;
}
@@ -2955,7 +2963,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
AddrMode.HasBaseReg = true;
AddrMode.BaseReg = Addr;
// Still check for legality in case the target supports [imm] but not [i+r].
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.HasBaseReg = false;
AddrMode.BaseReg = nullptr;
@@ -2965,7 +2973,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
if (AddrMode.Scale == 0) {
AddrMode.Scale = 1;
AddrMode.ScaledReg = Addr;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.Scale = 0;
AddrMode.ScaledReg = nullptr;
@@ -2984,7 +2992,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints =
- TLI->ParseConstraints(TRI, ImmutableCallSite(CI));
+ TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
+ ImmutableCallSite(CI));
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -3324,7 +3333,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
- Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
Value *ResultPtr = nullptr, *ResultIndex = nullptr;
// First, find the pointer.
@@ -3443,7 +3452,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
- Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
Value *Result = nullptr;
// Start with the base register. Do this first so that subsequent address
@@ -3545,8 +3554,8 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
const TargetRegisterInfo *TRI =
TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
- TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI->ParseConstraints(TRI, CS);
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(*DL, TRI, CS);
unsigned ArgNo = 0;
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -3680,7 +3689,7 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
TotalCreatedInstsCost -= ExtCost;
if (!StressExtLdPromotion &&
(TotalCreatedInstsCost > 1 ||
- !isPromotedInstructionLegal(*TLI, PromotedVal))) {
+ !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
// The promotion is not profitable, rollback to the previous state.
TPT.rollback(LastKnownGood);
continue;
@@ -3735,8 +3744,8 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
if (!HasPromoted && LI->getParent() == I->getParent())
return false;
- EVT VT = TLI->getValueType(I->getType());
- EVT LoadVT = TLI->getValueType(LI->getType());
+ EVT VT = TLI->getValueType(*DL, I->getType());
+ EVT LoadVT = TLI->getValueType(*DL, LI->getType());
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
@@ -4013,6 +4022,9 @@ namespace {
/// Assuming both extractelement and store can be combine, we get rid of the
/// transition.
class VectorPromoteHelper {
+ /// DataLayout associated with the current module.
+ const DataLayout &DL;
+
/// Used to perform some checks on the legality of vector operations.
const TargetLowering &TLI;
@@ -4086,7 +4098,8 @@ class VectorPromoteHelper {
unsigned Align = ST->getAlignment();
// Check if this store is supported.
if (!TLI.allowsMisalignedMemoryAccesses(
- TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) {
+ TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
+ Align)) {
// If this is not supported, there is no way we can combine
// the extract with the store.
return false;
@@ -4181,9 +4194,10 @@ class VectorPromoteHelper {
}
public:
- VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI,
- Instruction *Transition, unsigned CombineCost)
- : TLI(TLI), TTI(TTI), Transition(Transition),
+ VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
+ const TargetTransformInfo &TTI, Instruction *Transition,
+ unsigned CombineCost)
+ : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
assert(Transition && "Do not know how to promote null");
}
@@ -4219,7 +4233,7 @@ public:
return false;
return StressStoreExtract ||
TLI.isOperationLegalOrCustom(
- ISDOpcode, TLI.getValueType(getTransitionType(), true));
+ ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
}
/// \brief Check whether or not \p Use can be combined
@@ -4323,7 +4337,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
// we do not do that for now.
BasicBlock *Parent = Inst->getParent();
DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
- VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost);
+ VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
// If the transition has more than one use, assume this is not going to be
// beneficial.
while (Inst->hasOneUse()) {
@@ -4368,8 +4382,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- const DataLayout &DL = I->getModule()->getDataLayout();
- if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
+ if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
@@ -4388,15 +4401,16 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
if (isa<Constant>(CI->getOperand(0)))
return false;
- if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+ if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
return true;
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
/// Sink a zext or sext into its user blocks if the target type doesn't
/// fit in one register
- if (TLI && TLI->getTypeAction(CI->getContext(),
- TLI->getValueType(CI->getType())) ==
- TargetLowering::TypeExpandInteger) {
+ if (TLI &&
+ TLI->getTypeAction(CI->getContext(),
+ TLI->getValueType(*DL, CI->getType())) ==
+ TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
bool MadeChange = MoveExtToFormExtLoad(I);
@@ -4433,7 +4447,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
if (TLI && CI && TLI->hasExtractBitsInsn())
- return OptimizeExtractBits(BinOp, CI, *TLI);
+ return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
return false;
}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 963d573ea7f0..941129b5cc95 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -60,7 +60,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
return false;
// Don't delete frame allocation labels.
- if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC)
+ if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE)
return false;
// Don't delete instructions with side effects.
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 5b09cf1a0fd7..201f9c150083 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -733,12 +733,14 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
// If no relevant registers are used in the function, we can skip it
// completely.
bool anyregs = false;
+ const MachineRegisterInfo &MRI = mf.getRegInfo();
for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
- I != E; ++I)
- if (MF->getRegInfo().isPhysRegUsed(*I)) {
- anyregs = true;
- break;
- }
+ I != E && !anyregs; ++I)
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
+ if (!MRI.reg_nodbg_empty(*AI)) {
+ anyregs = true;
+ break;
+ }
if (!anyregs) return false;
// Initialize the AliasMap on the first use.
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 37b3bf17ed1f..6f9e8394081e 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -117,7 +117,6 @@ STATISTIC(NumMerged, "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
const TargetMachine *TM;
- const DataLayout *DL;
// FIXME: Infer the maximum possible offset depending on the actual users
// (these max offsets are different for the users inside Thumb or ARM
// functions), see the code that passes in the offset in the ARM backend
@@ -160,8 +159,8 @@ namespace {
explicit GlobalMerge(const TargetMachine *TM = nullptr,
unsigned MaximalOffset = 0,
bool OnlyOptimizeForSize = false)
- : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()),
- MaxOffset(MaximalOffset), OnlyOptimizeForSize(OnlyOptimizeForSize) {
+ : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
+ OnlyOptimizeForSize(OnlyOptimizeForSize) {
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}
@@ -188,14 +187,16 @@ INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables",
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
+ auto &DL = M.getDataLayout();
// FIXME: Find better heuristics
- std::stable_sort(Globals.begin(), Globals.end(),
- [this](const GlobalVariable *GV1, const GlobalVariable *GV2) {
- Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
- Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+ std::stable_sort(
+ Globals.begin(), Globals.end(),
+ [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
- return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2));
- });
+ return (DL.getTypeAllocSize(Ty1) < DL.getTypeAllocSize(Ty2));
+ });
// If we want to just blindly group all globals together, do so.
if (!GlobalMergeGroupByUse) {
@@ -410,6 +411,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
unsigned AddrSpace) const {
Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ auto &DL = M.getDataLayout();
assert(Globals.size() > 1);
@@ -427,7 +429,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
GlobalVariable *TheFirstExternal = 0;
for (j = i; j != -1; j = GlobalSet.find_next(j)) {
Type *Ty = Globals[j]->getType()->getElementType();
- MergedSize += DL->getTypeAllocSize(Ty);
+ MergedSize += DL.getTypeAllocSize(Ty);
if (MergedSize > MaxOffset) {
break;
}
@@ -526,6 +528,7 @@ bool GlobalMerge::doInitialization(Module &M) {
if (!EnableGlobalMerge)
return false;
+ auto &DL = M.getDataLayout();
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
BSSGlobals;
bool Changed = false;
@@ -548,9 +551,9 @@ bool GlobalMerge::doInitialization(Module &M) {
unsigned AddressSpace = PT->getAddressSpace();
// Ignore fancy-aligned globals for now.
- unsigned Alignment = DL->getPreferredAlignment(I);
+ unsigned Alignment = DL.getPreferredAlignment(I);
Type *Ty = I->getType()->getElementType();
- if (Alignment > DL->getABITypeAlignment(Ty))
+ if (Alignment > DL.getABITypeAlignment(Ty))
continue;
// Ignore all 'special' globals.
@@ -562,7 +565,7 @@ bool GlobalMerge::doInitialization(Module &M) {
if (isMustKeepGlobalVariable(I))
continue;
- if (DL->getTypeAllocSize(Ty) < MaxOffset) {
+ if (DL.getTypeAllocSize(Ty) < MaxOffset) {
if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal())
BSSGlobals[AddressSpace].push_back(I);
else if (I->isConstant())
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index a02cd67ac649..93e04876a8f3 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -25,9 +25,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -47,6 +50,11 @@ static cl::opt<unsigned> PageSize("imp-null-check-page-size",
"bytes"),
cl::init(4096));
+#define DEBUG_TYPE "implicit-null-checks"
+
+STATISTIC(NumImplicitNullChecks,
+ "Number of explicit null checks made implicit");
+
namespace {
class ImplicitNullChecks : public MachineFunctionPass {
@@ -171,6 +179,9 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
// callq throw_NullPointerException
//
// LblNotNull:
+ // Inst0
+ // Inst1
+ // ...
// Def = Load (%RAX + <offset>)
// ...
//
@@ -181,6 +192,8 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
// jmp LblNotNull ;; explicit or fallthrough
//
// LblNotNull:
+ // Inst0
+ // Inst1
// ...
//
// LblNull:
@@ -188,15 +201,75 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
//
unsigned PointerReg = MBP.LHS.getReg();
- MachineInstr *MemOp = &*NotNullSucc->begin();
- unsigned BaseReg, Offset;
- if (TII->getMemOpBaseRegImmOfs(MemOp, BaseReg, Offset, TRI))
- if (MemOp->mayLoad() && !MemOp->isPredicable() && BaseReg == PointerReg &&
- Offset < PageSize && MemOp->getDesc().getNumDefs() == 1) {
- NullCheckList.emplace_back(MemOp, MBP.ConditionDef, &MBB, NotNullSucc,
- NullSucc);
- return true;
+
+ // As we scan NotNullSucc for a suitable load instruction, we keep track of
+ // the registers defined and used by the instructions we scan past. This bit
+ // of information lets us decide if it is legal to hoist the load instruction
+ // we find (if we do find such an instruction) to before NotNullSucc.
+ DenseSet<unsigned> RegDefs, RegUses;
+
+ // Returns true if it is safe to reorder MI to before NotNullSucc.
+ auto IsSafeToHoist = [&](MachineInstr *MI) {
+ // Right now we don't want to worry about LLVM's memory model. This can be
+ // made more precise later.
+ for (auto *MMO : MI->memoperands())
+ if (!MMO->isUnordered())
+ return false;
+
+ for (auto &MO : MI->operands()) {
+ if (MO.isReg() && MO.getReg()) {
+ for (unsigned Reg : RegDefs)
+ if (TRI->regsOverlap(Reg, MO.getReg()))
+ return false; // We found a write-after-write or read-after-write
+
+ if (MO.isDef())
+ for (unsigned Reg : RegUses)
+ if (TRI->regsOverlap(Reg, MO.getReg()))
+ return false; // We found a write-after-read
+ }
+ }
+
+ return true;
+ };
+
+ for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
+ ++MII) {
+ MachineInstr *MI = &*MII;
+ unsigned BaseReg, Offset;
+ if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+ if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg &&
+ Offset < PageSize && MI->getDesc().getNumDefs() == 1 &&
+ IsSafeToHoist(MI)) {
+ NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc,
+ NullSucc);
+ return true;
+ }
+
+ // MI did not match our criteria for conversion to a trapping load. Check
+ // if we can continue looking.
+
+ if (MI->mayStore() || MI->hasUnmodeledSideEffects())
+ return false;
+
+ for (auto *MMO : MI->memoperands())
+ // Right now we don't want to worry about LLVM's memory model.
+ if (!MMO->isUnordered())
+ return false;
+
+ // It _may_ be okay to reorder a later load instruction across MI. Make a
+ // note of its operands so that we can make the legality check if we find a
+ // suitable load instruction:
+
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+
+ if (MO.isDef())
+ RegDefs.insert(MO.getReg());
+ else
+ RegUses.insert(MO.getReg());
}
+ }
return false;
}
@@ -247,7 +320,7 @@ void ImplicitNullChecks::rewriteNullChecks(
// touch the successors list for any basic block since we haven't changed
// control flow, we've just made it implicit.
insertFaultingLoad(NC.MemOperation, NC.CheckBlock, HandlerLabel);
- NC.MemOperation->removeFromParent();
+ NC.MemOperation->eraseFromParent();
NC.CheckOperation->eraseFromParent();
// Insert an *unconditional* branch to not-null successor.
@@ -257,6 +330,8 @@ void ImplicitNullChecks::rewriteNullChecks(
// Emit the HandlerLabel as an EH_LABEL.
BuildMI(*NC.NullSucc, NC.NullSucc->begin(), DL,
TII->get(TargetOpcode::EH_LABEL)).addSym(HandlerLabel);
+
+ NumImplicitNullChecks++;
}
}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index b486bdc91453..37299eb664cf 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -90,8 +90,8 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
static MCContext *
addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
- bool DisableVerify, AnalysisID StartAfter,
- AnalysisID StopAfter,
+ bool DisableVerify, AnalysisID StartBefore,
+ AnalysisID StartAfter, AnalysisID StopAfter,
MachineFunctionInitializer *MFInitializer = nullptr) {
// Add internal analysis passes from the target machine.
@@ -100,7 +100,7 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
// Targets may override createPassConfig to provide a target-specific
// subclass.
TargetPassConfig *PassConfig = TM->createPassConfig(PM);
- PassConfig->setStartStopPasses(StartAfter, StopAfter);
+ PassConfig->setStartStopPasses(StartBefore, StartAfter, StopAfter);
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
@@ -143,11 +143,12 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
bool LLVMTargetMachine::addPassesToEmitFile(
PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
- bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter,
- MachineFunctionInitializer *MFInitializer) {
+ bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
+ AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) {
// Add common CodeGen passes.
- MCContext *Context = addPassesToGenerateCode(
- this, PM, DisableVerify, StartAfter, StopAfter, MFInitializer);
+ MCContext *Context =
+ addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
+ StopAfter, MFInitializer);
if (!Context)
return true;
@@ -231,7 +232,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
raw_pwrite_stream &Out,
bool DisableVerify) {
// Add common CodeGen passes.
- Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr);
+ Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr,
+ nullptr);
if (!Ctx)
return true;
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 154ce6fc122b..000151acd735 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -15,12 +15,12 @@
#include "RegisterCoalescer.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -49,7 +49,6 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
- MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
VRM = &getAnalysis<VirtRegMap>();
@@ -101,7 +100,6 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
<< " to " << PrintReg(PhysReg, TRI) << ':');
assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
- MRI->setPhysRegUsed(PhysReg);
foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
const LiveRange &Range) {
@@ -131,6 +129,14 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
DEBUG(dbgs() << '\n');
}
+bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const {
+ for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
+ if (!Matrix[*Unit].empty())
+ return true;
+ }
+ return false;
+}
+
bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
unsigned PhysReg) {
// Check if the cached information is valid.
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index e9b3916a11fa..482c33ae2235 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "MILexer.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include <cctype>
@@ -64,6 +65,17 @@ static bool isIdentifierChar(char C) {
return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
}
+static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
+ return StringSwitch<MIToken::TokenKind>(Identifier)
+ .Case("_", MIToken::underscore)
+ .Case("implicit", MIToken::kw_implicit)
+ .Case("implicit-def", MIToken::kw_implicit_define)
+ .Case("dead", MIToken::kw_dead)
+ .Case("killed", MIToken::kw_killed)
+ .Case("undef", MIToken::kw_undef)
+ .Default(MIToken::Identifier);
+}
+
static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
if (!isalpha(C.peek()) && C.peek() != '_')
return None;
@@ -71,8 +83,7 @@ static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
while (isIdentifierChar(C.peek()))
C.advance();
auto Identifier = Range.upto(C);
- Token = MIToken(Identifier == "_" ? MIToken::underscore : MIToken::Identifier,
- Identifier);
+ Token = MIToken(getIdentifierKind(Identifier), Identifier);
return C;
}
@@ -104,9 +115,22 @@ static Cursor maybeLexMachineBasicBlock(
return C;
}
+static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
+ auto Range = C;
+ C.advance(); // Skip '%'
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ Token = MIToken(MIToken::VirtualRegister, Range.upto(C),
+ APSInt(NumberRange.upto(C)));
+ return C;
+}
+
static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
if (C.peek() != '%')
return None;
+ if (isdigit(C.peek(1)))
+ return lexVirtualRegister(C, Token);
auto Range = C;
C.advance(); // Skip '%'
while (isIdentifierChar(C.peek()))
@@ -155,6 +179,8 @@ static MIToken::TokenKind symbolToken(char C) {
return MIToken::comma;
case '=':
return MIToken::equal;
+ case ':':
+ return MIToken::colon;
default:
return MIToken::Error;
}
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index c28935f38909..55460b56e7d6 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -35,6 +35,14 @@ struct MIToken {
comma,
equal,
underscore,
+ colon,
+
+ // Keywords
+ kw_implicit,
+ kw_implicit_define,
+ kw_dead,
+ kw_killed,
+ kw_undef,
// Identifier tokens
Identifier,
@@ -44,7 +52,8 @@ struct MIToken {
GlobalValue,
// Other tokens
- IntegerLiteral
+ IntegerLiteral,
+ VirtualRegister
};
private:
@@ -66,7 +75,13 @@ public:
bool isError() const { return Kind == Error; }
bool isRegister() const {
- return Kind == NamedRegister || Kind == underscore;
+ return Kind == NamedRegister || Kind == underscore ||
+ Kind == VirtualRegister;
+ }
+
+ bool isRegisterFlag() const {
+ return Kind == kw_implicit || Kind == kw_implicit_define ||
+ Kind == kw_dead || Kind == kw_killed || Kind == kw_undef;
}
bool is(TokenKind K) const { return Kind == K; }
@@ -81,7 +96,7 @@ public:
bool hasIntegerValue() const {
return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
- Kind == GlobalValue;
+ Kind == GlobalValue || Kind == VirtualRegister;
}
};
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index b618e53b8e43..c00011288a60 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SourceMgr.h"
@@ -28,14 +29,25 @@ using namespace llvm;
namespace {
+/// A wrapper struct around the 'MachineOperand' struct that includes a source
+/// range.
+struct MachineOperandWithLocation {
+ MachineOperand Operand;
+ StringRef::iterator Begin;
+ StringRef::iterator End;
+
+ MachineOperandWithLocation(const MachineOperand &Operand,
+ StringRef::iterator Begin, StringRef::iterator End)
+ : Operand(Operand), Begin(Begin), End(End) {}
+};
+
class MIParser {
SourceMgr &SM;
MachineFunction &MF;
SMDiagnostic &Error;
StringRef Source, CurrentSource;
MIToken Token;
- /// Maps from basic block numbers to MBBs.
- const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots;
+ const PerFunctionMIParsingState &PFS;
/// Maps from indices to unnamed global values and metadata nodes.
const SlotMapping &IRSlots;
/// Maps from instruction names to op codes.
@@ -44,11 +56,12 @@ class MIParser {
StringMap<unsigned> Names2Regs;
/// Maps from register mask names to register masks.
StringMap<const uint32_t *> Names2RegMasks;
+ /// Maps from subregister names to subregister indices.
+ StringMap<unsigned> Names2SubRegIndices;
public:
MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
- StringRef Source,
- const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+ StringRef Source, const PerFunctionMIParsingState &PFS,
const SlotMapping &IRSlots);
void lex();
@@ -65,8 +78,11 @@ public:
bool parse(MachineInstr *&MI);
bool parseMBB(MachineBasicBlock *&MBB);
+ bool parseNamedRegister(unsigned &Reg);
bool parseRegister(unsigned &Reg);
+ bool parseRegisterFlag(unsigned &Flags);
+ bool parseSubRegisterIndex(unsigned &SubReg);
bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false);
bool parseImmediateOperand(MachineOperand &Dest);
bool parseMBBReference(MachineBasicBlock *&MBB);
@@ -88,6 +104,9 @@ private:
bool parseInstruction(unsigned &OpCode);
+ bool verifyImplicitOperands(ArrayRef<MachineOperandWithLocation> Operands,
+ const MCInstrDesc &MCID);
+
void initNames2Regs();
/// Try to convert a register name to a register number. Return true if the
@@ -100,17 +119,22 @@ private:
///
/// Return null if the identifier isn't a register mask.
const uint32_t *getRegMask(StringRef Identifier);
+
+ void initNames2SubRegIndices();
+
+ /// Check if the given identifier is a name of a subregister index.
+ ///
+ /// Return 0 if the name isn't a subregister index class.
+ unsigned getSubRegIndex(StringRef Name);
};
} // end anonymous namespace
MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
- StringRef Source,
- const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+ StringRef Source, const PerFunctionMIParsingState &PFS,
const SlotMapping &IRSlots)
: SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
- Token(MIToken::Error, StringRef()), MBBSlots(MBBSlots), IRSlots(IRSlots) {
-}
+ Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {}
void MIParser::lex() {
CurrentSource = lexMIToken(
@@ -121,8 +145,6 @@ void MIParser::lex() {
bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
- // TODO: Get the proper location in the MIR file, not just a location inside
- // the string.
assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
Error = SMDiagnostic(
SM, SMLoc(),
@@ -137,11 +159,12 @@ bool MIParser::parse(MachineInstr *&MI) {
// Parse any register operands before '='
// TODO: Allow parsing of multiple operands before '='
MachineOperand MO = MachineOperand::CreateImm(0);
- SmallVector<MachineOperand, 8> Operands;
- if (Token.isRegister()) {
+ SmallVector<MachineOperandWithLocation, 8> Operands;
+ if (Token.isRegister() || Token.isRegisterFlag()) {
+ auto Loc = Token.location();
if (parseRegisterOperand(MO, /*IsDef=*/true))
return true;
- Operands.push_back(MO);
+ Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location()));
if (Token.isNot(MIToken::equal))
return error("expected '='");
lex();
@@ -155,9 +178,10 @@ bool MIParser::parse(MachineInstr *&MI) {
// Parse the remaining machine operands.
while (Token.isNot(MIToken::Eof)) {
+ auto Loc = Token.location();
if (parseMachineOperand(MO))
return true;
- Operands.push_back(MO);
+ Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location()));
if (Token.is(MIToken::Eof))
break;
if (Token.isNot(MIToken::comma))
@@ -166,25 +190,16 @@ bool MIParser::parse(MachineInstr *&MI) {
}
const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
-
- // Verify machine operands.
if (!MCID.isVariadic()) {
- for (size_t I = 0, E = Operands.size(); I < E; ++I) {
- if (I < MCID.getNumOperands())
- continue;
- // Mark this register as implicit to prevent an assertion when it's added
- // to an instruction. This is a temporary workaround until the implicit
- // register flag can be parsed.
- if (Operands[I].isReg())
- Operands[I].setImplicit();
- }
+ // FIXME: Move the implicit operand verification to the machine verifier.
+ if (verifyImplicitOperands(Operands, MCID))
+ return true;
}
- // TODO: Determine the implicit behaviour when implicit register flags are
- // parsed.
+ // TODO: Check for extraneous machine operands.
MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true);
for (const auto &Operand : Operands)
- MI->addOperand(MF, Operand);
+ MI->addOperand(MF, Operand.Operand);
return false;
}
@@ -201,6 +216,80 @@ bool MIParser::parseMBB(MachineBasicBlock *&MBB) {
return false;
}
+bool MIParser::parseNamedRegister(unsigned &Reg) {
+ lex();
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ if (parseRegister(Reg))
+ return 0;
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the register reference");
+ return false;
+}
+
+static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
+ assert(MO.isImplicit());
+ return MO.isDef() ? "implicit-def" : "implicit";
+}
+
+static std::string getRegisterName(const TargetRegisterInfo *TRI,
+ unsigned Reg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg");
+ return StringRef(TRI->getName(Reg)).lower();
+}
+
+bool MIParser::verifyImplicitOperands(
+ ArrayRef<MachineOperandWithLocation> Operands, const MCInstrDesc &MCID) {
+ if (MCID.isCall())
+ // We can't verify call instructions as they can contain arbitrary implicit
+ // register and register mask operands.
+ return false;
+
+ // Gather all the expected implicit operands.
+ SmallVector<MachineOperand, 4> ImplicitOperands;
+ if (MCID.ImplicitDefs)
+ for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ ImplicitOperands.push_back(
+ MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (MCID.ImplicitUses)
+ for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses)
+ ImplicitOperands.push_back(
+ MachineOperand::CreateReg(*ImpUses, false, true));
+
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ size_t I = ImplicitOperands.size(), J = Operands.size();
+ while (I) {
+ --I;
+ if (J) {
+ --J;
+ const auto &ImplicitOperand = ImplicitOperands[I];
+ const auto &Operand = Operands[J].Operand;
+ if (ImplicitOperand.isIdenticalTo(Operand))
+ continue;
+ if (Operand.isReg() && Operand.isImplicit()) {
+ return error(Operands[J].Begin,
+ Twine("expected an implicit register operand '") +
+ printImplicitRegisterFlag(ImplicitOperand) + " %" +
+ getRegisterName(TRI, ImplicitOperand.getReg()) + "'");
+ }
+ }
+ // TODO: Fix source location when Operands[J].end is right before '=', i.e:
+ // insead of reporting an error at this location:
+ // %eax = MOV32r0
+ // ^
+ // report the error at the following location:
+ // %eax = MOV32r0
+ // ^
+ return error(J < Operands.size() ? Operands[J].End : Token.location(),
+ Twine("missing implicit register operand '") +
+ printImplicitRegisterFlag(ImplicitOperands[I]) + " %" +
+ getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'");
+ }
+ return false;
+}
+
bool MIParser::parseInstruction(unsigned &OpCode) {
if (Token.isNot(MIToken::Identifier))
return error("expected a machine instruction");
@@ -222,6 +311,17 @@ bool MIParser::parseRegister(unsigned &Reg) {
return error(Twine("unknown register name '") + Name + "'");
break;
}
+ case MIToken::VirtualRegister: {
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ const auto RegInfo = PFS.VirtualRegisterSlots.find(ID);
+ if (RegInfo == PFS.VirtualRegisterSlots.end())
+ return error(Twine("use of undefined virtual register '%") + Twine(ID) +
+ "'");
+ Reg = RegInfo->second;
+ break;
+ }
// TODO: Parse other register kinds.
default:
llvm_unreachable("The current token should be a register");
@@ -229,14 +329,66 @@ bool MIParser::parseRegister(unsigned &Reg) {
return false;
}
+bool MIParser::parseRegisterFlag(unsigned &Flags) {
+ switch (Token.kind()) {
+ case MIToken::kw_implicit:
+ Flags |= RegState::Implicit;
+ break;
+ case MIToken::kw_implicit_define:
+ Flags |= RegState::ImplicitDefine;
+ break;
+ case MIToken::kw_dead:
+ Flags |= RegState::Dead;
+ break;
+ case MIToken::kw_killed:
+ Flags |= RegState::Kill;
+ break;
+ case MIToken::kw_undef:
+ Flags |= RegState::Undef;
+ break;
+ // TODO: report an error when we specify the same flag more than once.
+ // TODO: parse the other register flags.
+ default:
+ llvm_unreachable("The current token should be a register flag");
+ }
+ lex();
+ return false;
+}
+
+bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
+ assert(Token.is(MIToken::colon));
+ lex();
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected a subregister index after ':'");
+ auto Name = Token.stringValue();
+ SubReg = getSubRegIndex(Name);
+ if (!SubReg)
+ return error(Twine("use of unknown subregister index '") + Name + "'");
+ lex();
+ return false;
+}
+
bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
unsigned Reg;
- // TODO: Parse register flags.
+ unsigned Flags = IsDef ? RegState::Define : 0;
+ while (Token.isRegisterFlag()) {
+ if (parseRegisterFlag(Flags))
+ return true;
+ }
+ if (!Token.isRegister())
+ return error("expected a register after register flags");
if (parseRegister(Reg))
return true;
lex();
- // TODO: Parse subregister.
- Dest = MachineOperand::CreateReg(Reg, IsDef);
+ unsigned SubReg = 0;
+ if (Token.is(MIToken::colon)) {
+ if (parseSubRegisterIndex(SubReg))
+ return true;
+ }
+ Dest = MachineOperand::CreateReg(
+ Reg, Flags & RegState::Define, Flags & RegState::Implicit,
+ Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
+ /*isEarlyClobber=*/false, SubReg);
return false;
}
@@ -266,8 +418,8 @@ bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
unsigned Number;
if (getUnsigned(Number))
return true;
- auto MBBInfo = MBBSlots.find(Number);
- if (MBBInfo == MBBSlots.end())
+ auto MBBInfo = PFS.MBBSlots.find(Number);
+ if (MBBInfo == PFS.MBBSlots.end())
return error(Twine("use of undefined machine basic block #") +
Twine(Number));
MBB = MBBInfo->second;
@@ -318,8 +470,14 @@ bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
bool MIParser::parseMachineOperand(MachineOperand &Dest) {
switch (Token.kind()) {
+ case MIToken::kw_implicit:
+ case MIToken::kw_implicit_define:
+ case MIToken::kw_dead:
+ case MIToken::kw_killed:
+ case MIToken::kw_undef:
case MIToken::underscore:
case MIToken::NamedRegister:
+ case MIToken::VirtualRegister:
return parseRegisterOperand(Dest);
case MIToken::IntegerLiteral:
return parseImmediateOperand(Dest);
@@ -408,16 +566,41 @@ const uint32_t *MIParser::getRegMask(StringRef Identifier) {
return RegMaskInfo->getValue();
}
-bool llvm::parseMachineInstr(
- MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF, StringRef Src,
- const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
- const SlotMapping &IRSlots, SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, MBBSlots, IRSlots).parse(MI);
+void MIParser::initNames2SubRegIndices() {
+ if (!Names2SubRegIndices.empty())
+ return;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I)
+ Names2SubRegIndices.insert(
+ std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I));
+}
+
+unsigned MIParser::getSubRegIndex(StringRef Name) {
+ initNames2SubRegIndices();
+ auto SubRegInfo = Names2SubRegIndices.find(Name);
+ if (SubRegInfo == Names2SubRegIndices.end())
+ return 0;
+ return SubRegInfo->getValue();
+}
+
+bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM,
+ MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots, SMDiagnostic &Error) {
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI);
+}
+
+bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
+ MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots, SMDiagnostic &Error) {
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB);
}
-bool llvm::parseMBBReference(
- MachineBasicBlock *&MBB, SourceMgr &SM, MachineFunction &MF, StringRef Src,
- const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
- const SlotMapping &IRSlots, SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, MBBSlots, IRSlots).parseMBB(MBB);
+bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
+ MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error) {
+ return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseNamedRegister(Reg);
}
diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h
index 4d6d4e700217..fca4c4e6f885 100644
--- a/lib/CodeGen/MIRParser/MIParser.h
+++ b/lib/CodeGen/MIRParser/MIParser.h
@@ -26,16 +26,26 @@ struct SlotMapping;
class SMDiagnostic;
class SourceMgr;
+struct PerFunctionMIParsingState {
+ DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
+ DenseMap<unsigned, unsigned> VirtualRegisterSlots;
+};
+
bool parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF,
- StringRef Src,
- const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+ StringRef Src, const PerFunctionMIParsingState &PFS,
const SlotMapping &IRSlots, SMDiagnostic &Error);
bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
MachineFunction &MF, StringRef Src,
- const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+ const PerFunctionMIParsingState &PFS,
const SlotMapping &IRSlots, SMDiagnostic &Error);
+bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
+ MachineFunction &MF, StringRef Src,
+ const PerFunctionMIParsingState &PFS,
+ const SlotMapping &IRSlots,
+ SMDiagnostic &Error);
+
} // end namespace llvm
#endif
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 397458300782..16b0e1655891 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -21,6 +21,7 @@
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/IR/BasicBlock.h"
@@ -48,6 +49,8 @@ class MIRParserImpl {
LLVMContext &Context;
StringMap<std::unique_ptr<yaml::MachineFunction>> Functions;
SlotMapping IRSlots;
+ /// Maps from register class names to register classes.
+ StringMap<const TargetRegisterClass *> Names2RegClasses;
public:
MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
@@ -60,6 +63,11 @@ public:
/// Always returns true.
bool error(const Twine &Message);
+ /// Report an error with the given message at the given location.
+ ///
+ /// Always returns true.
+ bool error(SMLoc Loc, const Twine &Message);
+
/// Report a given error with the location translated from the location in an
/// embedded string literal to a location in the MIR file.
///
@@ -90,13 +98,18 @@ public:
/// Initialize the machine basic block using it's YAML representation.
///
/// Return true if an error occurred.
- bool initializeMachineBasicBlock(
- MachineFunction &MF, MachineBasicBlock &MBB,
- const yaml::MachineBasicBlock &YamlMBB,
- const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+ bool initializeMachineBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
+ const yaml::MachineBasicBlock &YamlMBB,
+ const PerFunctionMIParsingState &PFS);
+
+ bool
+ initializeRegisterInfo(const MachineFunction &MF,
+ MachineRegisterInfo &RegInfo,
+ const yaml::MachineFunction &YamlMF,
+ DenseMap<unsigned, unsigned> &VirtualRegisterSlots);
- bool initializeRegisterInfo(MachineRegisterInfo &RegInfo,
- const yaml::MachineFunction &YamlMF);
+ bool initializeFrameInfo(MachineFrameInfo &MFI,
+ const yaml::MachineFunction &YamlMF);
private:
/// Return a MIR diagnostic converted from an MI string diagnostic.
@@ -109,6 +122,14 @@ private:
/// Create an empty function with the given name.
void createDummyFunction(StringRef Name, Module &M);
+
+ void initNames2RegClasses(const MachineFunction &MF);
+
+ /// Check if the given identifier is a name of a register class.
+ ///
+ /// Return null if the name isn't a register class.
+ const TargetRegisterClass *getRegClass(const MachineFunction &MF,
+ StringRef Name);
};
} // end namespace llvm
@@ -125,6 +146,12 @@ bool MIRParserImpl::error(const Twine &Message) {
return true;
}
+bool MIRParserImpl::error(SMLoc Loc, const Twine &Message) {
+ Context.diagnose(DiagnosticInfoMIRParser(
+ DS_Error, SM.GetMessage(Loc, SourceMgr::DK_Error, Message)));
+ return true;
+}
+
bool MIRParserImpl::error(const SMDiagnostic &Error, SMRange SourceRange) {
assert(Error.getKind() == SourceMgr::DK_Error && "Expected an error");
reportDiagnostic(diagFromMIStringDiag(Error, SourceRange));
@@ -233,34 +260,44 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
MF.setHasInlineAsm(YamlMF.HasInlineAsm);
- if (initializeRegisterInfo(MF.getRegInfo(), YamlMF))
+ PerFunctionMIParsingState PFS;
+ if (initializeRegisterInfo(MF, MF.getRegInfo(), YamlMF,
+ PFS.VirtualRegisterSlots))
+ return true;
+ if (initializeFrameInfo(*MF.getFrameInfo(), YamlMF))
return true;
const auto &F = *MF.getFunction();
- DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
for (const auto &YamlMBB : YamlMF.BasicBlocks) {
const BasicBlock *BB = nullptr;
- if (!YamlMBB.Name.empty()) {
+ const yaml::StringValue &Name = YamlMBB.Name;
+ if (!Name.Value.empty()) {
BB = dyn_cast_or_null<BasicBlock>(
- F.getValueSymbolTable().lookup(YamlMBB.Name));
+ F.getValueSymbolTable().lookup(Name.Value));
if (!BB)
- return error(Twine("basic block '") + YamlMBB.Name +
- "' is not defined in the function '" + MF.getName() + "'");
+ return error(Name.SourceRange.Start,
+ Twine("basic block '") + Name.Value +
+ "' is not defined in the function '" + MF.getName() +
+ "'");
}
auto *MBB = MF.CreateMachineBasicBlock(BB);
MF.insert(MF.end(), MBB);
- bool WasInserted = MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second;
+ bool WasInserted =
+ PFS.MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second;
if (!WasInserted)
return error(Twine("redefinition of machine basic block with id #") +
Twine(YamlMBB.ID));
}
+ if (YamlMF.BasicBlocks.empty())
+ return error(Twine("machine function '") + Twine(MF.getName()) +
+ "' requires at least one machine basic block in its body");
// Initialize the machine basic blocks after creating them all so that the
// machine instructions parser can resolve the MBB references.
unsigned I = 0;
for (const auto &YamlMBB : YamlMF.BasicBlocks) {
if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB,
- MBBSlots))
+ PFS))
return true;
}
return false;
@@ -269,7 +306,7 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
bool MIRParserImpl::initializeMachineBasicBlock(
MachineFunction &MF, MachineBasicBlock &MBB,
const yaml::MachineBasicBlock &YamlMBB,
- const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+ const PerFunctionMIParsingState &PFS) {
MBB.setAlignment(YamlMBB.Alignment);
if (YamlMBB.AddressTaken)
MBB.setHasAddressTaken();
@@ -278,16 +315,24 @@ bool MIRParserImpl::initializeMachineBasicBlock(
// Parse the successors.
for (const auto &MBBSource : YamlMBB.Successors) {
MachineBasicBlock *SuccMBB = nullptr;
- if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, MBBSlots, IRSlots,
+ if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, PFS, IRSlots,
Error))
return error(Error, MBBSource.SourceRange);
// TODO: Report an error when adding the same successor more than once.
MBB.addSuccessor(SuccMBB);
}
+ // Parse the liveins.
+ for (const auto &LiveInSource : YamlMBB.LiveIns) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(Reg, SM, MF, LiveInSource.Value, PFS,
+ IRSlots, Error))
+ return error(Error, LiveInSource.SourceRange);
+ MBB.addLiveIn(Reg);
+ }
// Parse the instructions.
for (const auto &MISource : YamlMBB.Instructions) {
MachineInstr *MI = nullptr;
- if (parseMachineInstr(MI, SM, MF, MISource.Value, MBBSlots, IRSlots, Error))
+ if (parseMachineInstr(MI, SM, MF, MISource.Value, PFS, IRSlots, Error))
return error(Error, MISource.SourceRange);
MBB.insert(MBB.end(), MI);
}
@@ -295,7 +340,9 @@ bool MIRParserImpl::initializeMachineBasicBlock(
}
bool MIRParserImpl::initializeRegisterInfo(
- MachineRegisterInfo &RegInfo, const yaml::MachineFunction &YamlMF) {
+ const MachineFunction &MF, MachineRegisterInfo &RegInfo,
+ const yaml::MachineFunction &YamlMF,
+ DenseMap<unsigned, unsigned> &VirtualRegisterSlots) {
assert(RegInfo.isSSA());
if (!YamlMF.IsSSA)
RegInfo.leaveSSA();
@@ -303,6 +350,67 @@ bool MIRParserImpl::initializeRegisterInfo(
if (!YamlMF.TracksRegLiveness)
RegInfo.invalidateLiveness();
RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness);
+
+ // Parse the virtual register information.
+ for (const auto &VReg : YamlMF.VirtualRegisters) {
+ const auto *RC = getRegClass(MF, VReg.Class.Value);
+ if (!RC)
+ return error(VReg.Class.SourceRange.Start,
+ Twine("use of undefined register class '") +
+ VReg.Class.Value + "'");
+ unsigned Reg = RegInfo.createVirtualRegister(RC);
+ // TODO: Report an error when the same virtual register with the same ID is
+ // redefined.
+ VirtualRegisterSlots.insert(std::make_pair(VReg.ID, Reg));
+ }
+ return false;
+}
+
+bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
+ const yaml::MachineFunction &YamlMF) {
+ const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
+ MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
+ MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
+ MFI.setHasStackMap(YamlMFI.HasStackMap);
+ MFI.setHasPatchPoint(YamlMFI.HasPatchPoint);
+ MFI.setStackSize(YamlMFI.StackSize);
+ MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment);
+ if (YamlMFI.MaxAlignment)
+ MFI.ensureMaxAlignment(YamlMFI.MaxAlignment);
+ MFI.setAdjustsStack(YamlMFI.AdjustsStack);
+ MFI.setHasCalls(YamlMFI.HasCalls);
+ MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
+ MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
+ MFI.setHasVAStart(YamlMFI.HasVAStart);
+ MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
+
+ // Initialize the fixed frame objects.
+ for (const auto &Object : YamlMF.FixedStackObjects) {
+ int ObjectIdx;
+ if (Object.Type != yaml::FixedMachineStackObject::SpillSlot)
+ ObjectIdx = MFI.CreateFixedObject(Object.Size, Object.Offset,
+ Object.IsImmutable, Object.IsAliased);
+ else
+ ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
+ MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
+ // TODO: Store the mapping between fixed object IDs and object indices to
+ // parse fixed stack object references correctly.
+ }
+
+ // Initialize the ordinary frame objects.
+ for (const auto &Object : YamlMF.StackObjects) {
+ int ObjectIdx;
+ if (Object.Type == yaml::MachineStackObject::VariableSized)
+ ObjectIdx =
+ MFI.CreateVariableSizedObject(Object.Alignment, /*Alloca=*/nullptr);
+ else
+ ObjectIdx = MFI.CreateStackObject(
+ Object.Size, Object.Alignment,
+ Object.Type == yaml::MachineStackObject::SpillSlot);
+ MFI.setObjectOffset(ObjectIdx, Object.Offset);
+ // TODO: Store the mapping between object IDs and object indices to parse
+ // stack object references correctly.
+ }
return false;
}
@@ -353,6 +461,26 @@ SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
Error.getFixIts());
}
+void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) {
+ if (!Names2RegClasses.empty())
+ return;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) {
+ const auto *RC = TRI->getRegClass(I);
+ Names2RegClasses.insert(
+ std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC));
+ }
+}
+
+const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
+ StringRef Name) {
+ initNames2RegClasses(MF);
+ auto RegClassInfo = Names2RegClasses.find(Name);
+ if (RegClassInfo == Names2RegClasses.end())
+ return nullptr;
+ return RegClassInfo->getValue();
+}
+
MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
: Impl(std::move(Impl)) {}
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index 76cbe2994c95..d5cf9244199e 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -15,10 +15,12 @@
#include "MIRPrinter.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/YAMLTraits.h"
@@ -40,9 +42,13 @@ public:
void print(const MachineFunction &MF);
- void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo);
- void convert(const Module &M, yaml::MachineBasicBlock &YamlMBB,
+ void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI);
+ void convert(yaml::MachineFrameInfo &YamlMFI, const MachineFrameInfo &MFI);
+ void convert(ModuleSlotTracker &MST, yaml::MachineBasicBlock &YamlMBB,
const MachineBasicBlock &MBB);
+ void convertStackObjects(yaml::MachineFunction &MF,
+ const MachineFrameInfo &MFI);
private:
void initRegisterMaskIds(const MachineFunction &MF);
@@ -51,14 +57,14 @@ private:
/// This class prints out the machine instructions using the MIR serialization
/// format.
class MIPrinter {
- const Module &M;
raw_ostream &OS;
+ ModuleSlotTracker &MST;
const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds;
public:
- MIPrinter(const Module &M, raw_ostream &OS,
+ MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST,
const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds)
- : M(M), OS(OS), RegisterMaskIds(RegisterMaskIds) {}
+ : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds) {}
void print(const MachineInstr &MI);
void printMBBReference(const MachineBasicBlock &MBB);
@@ -84,6 +90,19 @@ template <> struct BlockScalarTraits<Module> {
} // end namespace yaml
} // end namespace llvm
+static void printReg(unsigned Reg, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ // TODO: Print Stack Slots.
+ if (!Reg)
+ OS << '_';
+ else if (TargetRegisterInfo::isVirtualRegister(Reg))
+ OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (Reg < TRI->getNumRegs())
+ OS << '%' << StringRef(TRI->getName(Reg)).lower();
+ else
+ llvm_unreachable("Can't print this kind of register yet");
+}
+
void MIRPrinter::print(const MachineFunction &MF) {
initRegisterMaskIds(MF);
@@ -92,10 +111,12 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.Alignment = MF.getAlignment();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
YamlMF.HasInlineAsm = MF.hasInlineAsm();
- convert(YamlMF, MF.getRegInfo());
+ convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
+ convert(YamlMF.FrameInfo, *MF.getFrameInfo());
+ convertStackObjects(YamlMF, *MF.getFrameInfo());
int I = 0;
- const auto &M = *MF.getFunction()->getParent();
+ ModuleSlotTracker MST(MF.getFunction()->getParent());
for (const auto &MBB : MF) {
// TODO: Allow printing of non sequentially numbered MBBs.
// This is currently needed as the basic block references get their index
@@ -105,7 +126,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
"Can't print MBBs that aren't sequentially numbered");
(void)I;
yaml::MachineBasicBlock YamlMBB;
- convert(M, YamlMBB, MBB);
+ convert(MST, YamlMBB, MBB);
YamlMF.BasicBlocks.push_back(YamlMBB);
}
yaml::Output Out(OS);
@@ -113,37 +134,120 @@ void MIRPrinter::print(const MachineFunction &MF) {
}
void MIRPrinter::convert(yaml::MachineFunction &MF,
- const MachineRegisterInfo &RegInfo) {
+ const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI) {
MF.IsSSA = RegInfo.isSSA();
MF.TracksRegLiveness = RegInfo.tracksLiveness();
MF.TracksSubRegLiveness = RegInfo.subRegLivenessEnabled();
+
+ // Print the virtual register definitions.
+ for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ yaml::VirtualRegisterDefinition VReg;
+ VReg.ID = I;
+ VReg.Class =
+ StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+ MF.VirtualRegisters.push_back(VReg);
+ }
+}
+
+void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI,
+ const MachineFrameInfo &MFI) {
+ YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken();
+ YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken();
+ YamlMFI.HasStackMap = MFI.hasStackMap();
+ YamlMFI.HasPatchPoint = MFI.hasPatchPoint();
+ YamlMFI.StackSize = MFI.getStackSize();
+ YamlMFI.OffsetAdjustment = MFI.getOffsetAdjustment();
+ YamlMFI.MaxAlignment = MFI.getMaxAlignment();
+ YamlMFI.AdjustsStack = MFI.adjustsStack();
+ YamlMFI.HasCalls = MFI.hasCalls();
+ YamlMFI.MaxCallFrameSize = MFI.getMaxCallFrameSize();
+ YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
+ YamlMFI.HasVAStart = MFI.hasVAStart();
+ YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
}
-void MIRPrinter::convert(const Module &M, yaml::MachineBasicBlock &YamlMBB,
+void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
+ const MachineFrameInfo &MFI) {
+ // Process fixed stack objects.
+ unsigned ID = 0;
+ for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+ if (MFI.isDeadObjectIndex(I))
+ continue;
+
+ yaml::FixedMachineStackObject YamlObject;
+ YamlObject.ID = ID++;
+ YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
+ ? yaml::FixedMachineStackObject::SpillSlot
+ : yaml::FixedMachineStackObject::DefaultType;
+ YamlObject.Offset = MFI.getObjectOffset(I);
+ YamlObject.Size = MFI.getObjectSize(I);
+ YamlObject.Alignment = MFI.getObjectAlignment(I);
+ YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
+ YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
+ MF.FixedStackObjects.push_back(YamlObject);
+ // TODO: Store the mapping between fixed object IDs and object indices to
+ // print the fixed stack object references correctly.
+ }
+
+ // Process ordinary stack objects.
+ ID = 0;
+ for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I) {
+ if (MFI.isDeadObjectIndex(I))
+ continue;
+
+ yaml::MachineStackObject YamlObject;
+ YamlObject.ID = ID++;
+ YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
+ ? yaml::MachineStackObject::SpillSlot
+ : MFI.isVariableSizedObjectIndex(I)
+ ? yaml::MachineStackObject::VariableSized
+ : yaml::MachineStackObject::DefaultType;
+ YamlObject.Offset = MFI.getObjectOffset(I);
+ YamlObject.Size = MFI.getObjectSize(I);
+ YamlObject.Alignment = MFI.getObjectAlignment(I);
+
+ MF.StackObjects.push_back(YamlObject);
+ // TODO: Store the mapping between object IDs and object indices to print
+ // the stack object references correctly.
+ }
+}
+
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+ yaml::MachineBasicBlock &YamlMBB,
const MachineBasicBlock &MBB) {
assert(MBB.getNumber() >= 0 && "Invalid MBB number");
YamlMBB.ID = (unsigned)MBB.getNumber();
// TODO: Serialize unnamed BB references.
if (const auto *BB = MBB.getBasicBlock())
- YamlMBB.Name = BB->hasName() ? BB->getName() : "<unnamed bb>";
+ YamlMBB.Name.Value = BB->hasName() ? BB->getName() : "<unnamed bb>";
else
- YamlMBB.Name = "";
+ YamlMBB.Name.Value = "";
YamlMBB.Alignment = MBB.getAlignment();
YamlMBB.AddressTaken = MBB.hasAddressTaken();
YamlMBB.IsLandingPad = MBB.isLandingPad();
for (const auto *SuccMBB : MBB.successors()) {
std::string Str;
raw_string_ostream StrOS(Str);
- MIPrinter(M, StrOS, RegisterMaskIds).printMBBReference(*SuccMBB);
+ MIPrinter(StrOS, MST, RegisterMaskIds).printMBBReference(*SuccMBB);
YamlMBB.Successors.push_back(StrOS.str());
}
-
+ // Print the live in registers.
+ const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ for (auto I = MBB.livein_begin(), E = MBB.livein_end(); I != E; ++I) {
+ std::string Str;
+ raw_string_ostream StrOS(Str);
+ printReg(*I, StrOS, TRI);
+ YamlMBB.LiveIns.push_back(StrOS.str());
+ }
// Print the machine instructions.
YamlMBB.Instructions.reserve(MBB.size());
std::string Str;
for (const auto &MI : MBB) {
raw_string_ostream StrOS(Str);
- MIPrinter(M, StrOS, RegisterMaskIds).print(MI);
+ MIPrinter(StrOS, MST, RegisterMaskIds).print(MI);
YamlMBB.Instructions.push_back(StrOS.str());
Str.clear();
}
@@ -188,18 +292,6 @@ void MIPrinter::print(const MachineInstr &MI) {
}
}
-static void printReg(unsigned Reg, raw_ostream &OS,
- const TargetRegisterInfo *TRI) {
- // TODO: Print Stack Slots.
- // TODO: Print virtual registers.
- if (!Reg)
- OS << '_';
- else if (Reg < TRI->getNumRegs())
- OS << '%' << StringRef(TRI->getName(Reg)).lower();
- else
- llvm_unreachable("Can't print this kind of register yet");
-}
-
void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
OS << "%bb." << MBB.getNumber();
if (const auto *BB = MBB.getBasicBlock()) {
@@ -211,9 +303,19 @@ void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
switch (Op.getType()) {
case MachineOperand::MO_Register:
- // TODO: Print register flags.
+ // TODO: Print the other register flags.
+ if (Op.isImplicit())
+ OS << (Op.isDef() ? "implicit-def " : "implicit ");
+ if (Op.isDead())
+ OS << "dead ";
+ if (Op.isKill())
+ OS << "killed ";
+ if (Op.isUndef())
+ OS << "undef ";
printReg(Op.getReg(), OS, TRI);
- // TODO: Print sub register.
+ // Print the sub register.
+ if (Op.getSubReg() != 0)
+ OS << ':' << TRI->getSubRegIndexName(Op.getSubReg());
break;
case MachineOperand::MO_Immediate:
OS << Op.getImm();
@@ -222,10 +324,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
printMBBReference(*Op.getMBB());
break;
case MachineOperand::MO_GlobalAddress:
- // FIXME: Make this faster - print as operand will create a slot tracker to
- // print unnamed values for the whole module every time it's called, which
- // is inefficient.
- Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, &M);
+ Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
// TODO: Print offset and target flags.
break;
case MachineOperand::MO_RegisterMask: {
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 467a2e4eb428..3f04bb0b532b 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -19,8 +19,8 @@
using namespace llvm;
namespace llvm {
-TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
-TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+template class DomTreeNodeBase<MachineBasicBlock>;
+template class DominatorTreeBase<MachineBasicBlock>;
}
char MachineDominatorTree::ID = 0;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 800d1b5bd57d..9856e70edaef 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -74,7 +75,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
if (Fn->hasFnAttribute(Attribute::StackAlignment))
FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
- ConstantPool = new (Allocator) MachineConstantPool(TM);
+ ConstantPool = new (Allocator) MachineConstantPool(getDataLayout());
Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
@@ -118,6 +119,10 @@ MachineFunction::~MachineFunction() {
}
}
+const DataLayout &MachineFunction::getDataLayout() const {
+ return Fn->getParent()->getDataLayout();
+}
+
/// Get the JumpTableInfo for this function.
/// If it does not already exist, allocate one.
MachineJumpTableInfo *MachineFunction::
@@ -458,12 +463,12 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
/// normal 'L' label is returned.
MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
bool isLinkerPrivate) const {
- const DataLayout *DL = getTarget().getDataLayout();
+ const DataLayout &DL = getDataLayout();
assert(JumpTableInfo && "No jump tables");
assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
- const char *Prefix = isLinkerPrivate ? DL->getLinkerPrivateGlobalPrefix() :
- DL->getPrivateGlobalPrefix();
+ const char *Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix()
+ : DL.getPrivateGlobalPrefix();
SmallString<60> Name;
raw_svector_ostream(Name)
<< Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
@@ -472,9 +477,9 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
/// Return a function-local symbol to represent the PIC base.
MCSymbol *MachineFunction::getPICBaseSymbol() const {
- const DataLayout *DL = getTarget().getDataLayout();
- return Ctx.getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
- Twine(getFunctionNumber())+"$pb");
+ const DataLayout &DL = getDataLayout();
+ return Ctx.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ Twine(getFunctionNumber()) + "$pb");
}
//===----------------------------------------------------------------------===//
@@ -790,10 +795,6 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
void MachineConstantPoolValue::anchor() { }
-const DataLayout *MachineConstantPool::getDataLayout() const {
- return TM.getDataLayout();
-}
-
Type *MachineConstantPoolEntry::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
@@ -851,7 +852,7 @@ MachineConstantPool::~MachineConstantPool() {
/// Test whether the given two constants can be allocated the same constant pool
/// entry.
static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
- const DataLayout *TD) {
+ const DataLayout &DL) {
// Handle the trivial case quickly.
if (A == B) return true;
@@ -865,8 +866,8 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
return false;
// For now, only support constants with the same size.
- uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
- if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128)
+ uint64_t StoreSize = DL.getTypeStoreSize(A->getType());
+ if (StoreSize != DL.getTypeStoreSize(B->getType()) || StoreSize > 128)
return false;
Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
@@ -877,16 +878,16 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
// DataLayout.
if (isa<PointerType>(A->getType()))
A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
- const_cast<Constant *>(A), *TD);
+ const_cast<Constant *>(A), DL);
else if (A->getType() != IntTy)
A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
- const_cast<Constant *>(A), *TD);
+ const_cast<Constant *>(A), DL);
if (isa<PointerType>(B->getType()))
B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
- const_cast<Constant *>(B), *TD);
+ const_cast<Constant *>(B), DL);
else if (B->getType() != IntTy)
B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
- const_cast<Constant *>(B), *TD);
+ const_cast<Constant *>(B), DL);
return A == B;
}
@@ -903,8 +904,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
// FIXME, this could be made much more efficient for large constant pools.
for (unsigned i = 0, e = Constants.size(); i != e; ++i)
if (!Constants[i].isMachineConstantPoolEntry() &&
- CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C,
- getDataLayout())) {
+ CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, DL)) {
if ((unsigned)Constants[i].getAlignment() < Alignment)
Constants[i].Alignment = Alignment;
return i;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 42d0603ab96b..6a206249d834 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -320,7 +320,10 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
const Function *Personality) {
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
LP.Personality = Personality;
+ addPersonality(Personality);
+}
+void MachineModuleInfo::addPersonality(const Function *Personality) {
for (unsigned i = 0; i < Personalities.size(); ++i)
if (Personalities[i] == Personality)
return;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 278a8f24d63e..5984af87a184 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -28,7 +29,6 @@ MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
TracksSubRegLiveness(false) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
- UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs());
// Create the physreg use/def lists.
@@ -441,3 +441,49 @@ void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const {
UseMI->getOperand(0).setReg(0U);
}
}
+
+static const Function *getCalledFunction(const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isGlobal())
+ continue;
+ const Function *Func = dyn_cast<Function>(MO.getGlobal());
+ if (Func != nullptr)
+ return Func;
+ }
+ return nullptr;
+}
+
+static bool isNoReturnDef(const MachineOperand &MO) {
+ // Anything which is not a noreturn function is a real def.
+ const MachineInstr &MI = *MO.getParent();
+ if (!MI.isCall())
+ return false;
+ const MachineBasicBlock &MBB = *MI.getParent();
+ if (!MBB.succ_empty())
+ return false;
+ const MachineFunction &MF = *MBB.getParent();
+ // We need to keep correct unwind information even if the function will
+ // not return, since the runtime may need it.
+ if (MF.getFunction()->hasFnAttribute(Attribute::UWTable))
+ return false;
+ const Function *Called = getCalledFunction(MI);
+ if (Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn)
+ || !Called->hasFnAttribute(Attribute::NoUnwind))
+ return false;
+
+ return true;
+}
+
+bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const {
+ if (UsedPhysRegMask.test(PhysReg))
+ return true;
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) {
+ for (const MachineOperand &MO : make_range(def_begin(*AI), def_end())) {
+ if (isNoReturnDef(MO))
+ continue;
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index f9adba0b35c4..9404c687d410 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -509,18 +509,17 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
<< " height.\n");
// Find any MBB predecessors that have MBB as their preferred successor.
// They are the only ones that need to be invalidated.
- for (MachineBasicBlock::const_pred_iterator
- I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
- TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
if (!TBI.hasValidHeight())
continue;
if (TBI.Succ == MBB) {
TBI.invalidateHeight();
- WorkList.push_back(*I);
+ WorkList.push_back(Pred);
continue;
}
// Verify that TBI.Succ is actually a *I successor.
- assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
+ assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
}
} while (!WorkList.empty());
}
@@ -535,18 +534,17 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
<< " depth.\n");
// Find any MBB successors that have MBB as their preferred predecessor.
// They are the only ones that need to be invalidated.
- for (MachineBasicBlock::const_succ_iterator
- I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
- TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
if (!TBI.hasValidDepth())
continue;
if (TBI.Pred == MBB) {
TBI.invalidateDepth();
- WorkList.push_back(*I);
+ WorkList.push_back(Succ);
continue;
}
// Verify that TBI.Pred is actually a *I predecessor.
- assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
+ assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
}
} while (!WorkList.empty());
}
@@ -998,8 +996,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
// MBB is the highest precomputed block in the trace.
if (MBB) {
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
- for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
- LiveInReg LI = TBI.LiveIns[i];
+ for (LiveInReg &LI : TBI.LiveIns) {
if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
// For virtual registers, the def latency is included.
unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
@@ -1131,11 +1128,16 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
MachineTraceMetrics::Trace
MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
- // FIXME: Check cache tags, recompute as needed.
- computeTrace(MBB);
- computeInstrDepths(MBB);
- computeInstrHeights(MBB);
- return Trace(*this, BlockInfo[MBB->getNumber()]);
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+
+ if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
+ computeTrace(MBB);
+ if (!TBI.HasValidInstrDepths)
+ computeInstrDepths(MBB);
+ if (!TBI.HasValidInstrHeights)
+ computeInstrHeights(MBB);
+
+ return Trace(*this, TBI);
}
unsigned
@@ -1204,8 +1206,7 @@ unsigned MachineTraceMetrics::Trace::getResourceLength(
unsigned ResourceIdx)
->unsigned {
unsigned Cycles = 0;
- for (unsigned I = 0; I != Instrs.size(); ++I) {
- const MCSchedClassDesc *SC = Instrs[I];
+ for (const MCSchedClassDesc *SC : Instrs) {
if (!SC->isValid())
continue;
for (TargetSchedModel::ProcResIter
@@ -1223,8 +1224,8 @@ unsigned MachineTraceMetrics::Trace::getResourceLength(
for (unsigned K = 0; K != PRDepths.size(); ++K) {
unsigned PRCycles = PRDepths[K] + PRHeights[K];
- for (unsigned I = 0; I != Extrablocks.size(); ++I)
- PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+ for (const MachineBasicBlock *MBB : Extrablocks)
+ PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
PRCycles += extraCycles(ExtraInstrs, K);
PRCycles -= extraCycles(RemoveInstrs, K);
PRMax = std::max(PRMax, PRCycles);
@@ -1235,8 +1236,8 @@ unsigned MachineTraceMetrics::Trace::getResourceLength(
// Instrs: #instructions in current trace outside current block.
unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
// Add instruction count from the extra blocks.
- for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
- Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ for (const MachineBasicBlock *MBB : Extrablocks)
+ Instrs += TE.MTM.getResources(MBB)->InstrCount;
Instrs += ExtraInstrs.size();
Instrs -= RemoveInstrs.size();
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 210a7a1649cd..024d166a4987 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -214,10 +214,10 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
- : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr),
- Started(true), Stopped(false), AddingMachinePasses(false), TM(tm),
- Impl(nullptr), Initialized(false), DisableVerify(false),
- EnableTailMerge(true), EnableShrinkWrap(false) {
+ : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr),
+ StopAfter(nullptr), Started(true), Stopped(false),
+ AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
+ DisableVerify(false), EnableTailMerge(true), EnableShrinkWrap(false) {
Impl = new PassConfigImpl();
@@ -288,6 +288,8 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
// and shouldn't reference it.
AnalysisID PassID = P->getPassID();
+ if (StartBefore == PassID)
+ Started = true;
if (Started && !Stopped) {
std::string Banner;
// Construct banner message before PM->add() as that may delete the pass.
@@ -422,7 +424,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
// removed from the parent invoke(s). This could happen when a landing
// pad is shared by multiple invokes and is also a target of a normal
// edge from elsewhere.
- addPass(createSjLjEHPreparePass(TM));
+ addPass(createSjLjEHPreparePass());
// FALLTHROUGH
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 76583f0de888..b2fdee6c8e4c 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -82,7 +82,8 @@ private:
void calculateSets(MachineFunction &Fn);
void calculateCallsInformation(MachineFunction &Fn);
- void calculateCalleeSavedRegisters(MachineFunction &Fn);
+ void assignCalleeSavedSpillSlots(MachineFunction &Fn,
+ const BitVector &SavedRegs);
void insertCSRSpillsAndRestores(MachineFunction &Fn);
void calculateFrameObjectOffsets(MachineFunction &Fn);
void replaceFrameIndices(MachineFunction &Fn);
@@ -92,7 +93,7 @@ private:
void insertPrologEpilogCode(MachineFunction &Fn);
// Convenience for recognizing return blocks.
- bool isReturnBlock(MachineBasicBlock *MBB);
+ bool isReturnBlock(const MachineBasicBlock *MBB) const;
};
} // namespace
@@ -127,7 +128,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+bool PEI::isReturnBlock(const MachineBasicBlock* MBB) const {
return (MBB && !MBB->empty() && MBB->back().isReturn());
}
@@ -143,7 +144,12 @@ void PEI::calculateSets(MachineFunction &Fn) {
if (MFI->getSavePoint()) {
SaveBlock = MFI->getSavePoint();
assert(MFI->getRestorePoint() && "Both restore and save must be set");
- RestoreBlocks.push_back(MFI->getRestorePoint());
+ MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || isReturnBlock(RestoreBlock))
+ RestoreBlocks.push_back(RestoreBlock);
return;
}
@@ -178,13 +184,12 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// instructions.
calculateCallsInformation(Fn);
- // Allow the target machine to make some adjustments to the function
- // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
- TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+ // Determine which of the registers in the callee save list should be saved.
+ BitVector SavedRegs;
+ TFI->determineCalleeSaves(Fn, SavedRegs, RS);
- // Scan the function for modified callee saved registers and insert spill code
- // for any callee saved registers that are modified.
- calculateCalleeSavedRegisters(Fn);
+ // Insert spill code for any callee saved registers that are modified.
+ assignCalleeSavedSpillSlots(Fn, SavedRegs);
// Determine placement of CSR spill/restore code:
// place all spills in the entry block, all restores in return blocks.
@@ -290,39 +295,27 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
}
}
-
-/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
-/// registers.
-void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
- const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
- const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
- MachineFrameInfo *MFI = F.getFrameInfo();
-
- // Get the callee saved register list...
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
-
+void PEI::assignCalleeSavedSpillSlots(MachineFunction &F,
+ const BitVector &SavedRegs) {
// These are used to keep track the callee-save area. Initialize them.
MinCSFrameIndex = INT_MAX;
MaxCSFrameIndex = 0;
- // Early exit for targets which have no callee saved registers.
- if (!CSRegs || CSRegs[0] == 0)
+ if (SavedRegs.empty())
return;
- // In Naked functions we aren't going to save any registers.
- if (F.getFunction()->hasFnAttribute(Attribute::Naked))
- return;
+ const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
- // Functions which call __builtin_unwind_init get all their registers saved.
- if (F.getRegInfo().isPhysRegUsed(Reg) || F.getMMI().callsUnwindInit()) {
- // If the reg is modified, save it!
+ if (SavedRegs.test(Reg))
CSI.push_back(CalleeSavedInfo(Reg));
- }
}
+ const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
+ MachineFrameInfo *MFI = F.getFrameInfo();
if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
// If target doesn't implement this, use generic code.
@@ -1033,12 +1026,8 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Replace this reference to the virtual register with the
// scratch register.
assert (ScratchReg && "Missing scratch register!");
- MachineRegisterInfo &MRI = Fn.getRegInfo();
Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
- // Make sure MRI now accounts this register as used.
- MRI.setPhysRegUsed(ScratchReg);
-
// Because this instruction was processed by the RS before this
// register was allocated, make sure that the RS now records the
// register as being used.
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index fd3d4d78968b..660bb4f0e34d 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -986,10 +986,6 @@ void RAFast::AllocateBasicBlock() {
}
}
- for (UsedInInstrSet::iterator
- I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
- MRI->setRegUnitUsed(*I);
-
// Track registers defined by instruction - early clobbers and tied uses at
// this point.
UsedInInstr.clear();
@@ -1050,10 +1046,6 @@ void RAFast::AllocateBasicBlock() {
killVirtReg(VirtDead[i]);
VirtDead.clear();
- for (UsedInInstrSet::iterator
- I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
- MRI->setRegUnitUsed(*I);
-
if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
DEBUG(dbgs() << "-- coalescing: " << *MI);
Coalesced.push_back(MI);
@@ -1103,12 +1095,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
AllocateBasicBlock();
}
- // Add the clobber lists for all the instructions we skipped earlier.
- for (const MCInstrDesc *Desc : SkippedInstrs)
- if (const uint16_t *Defs = Desc->getImplicitDefs())
- while (*Defs)
- MRI->setPhysRegUsed(*Defs++);
-
// All machine operands and other references to virtual registers have been
// replaced. Remove the virtual registers.
MRI->clearVirtRegs();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 26f42c93323a..7ebcf7f54856 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -400,6 +400,8 @@ private:
typedef SmallVector<HintInfo, 4> HintsInfo;
BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
void collectHintInfo(unsigned, HintsInfo &);
+
+ bool isUnusedCalleeSavedReg(unsigned PhysReg) const;
};
} // end anonymous namespace
@@ -816,6 +818,16 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
}
}
+/// Returns true if the given \p PhysReg is a callee saved register and has not
+/// been used for allocation yet.
+bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const {
+ unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
+ if (CSR == 0)
+ return false;
+
+ return !Matrix->isPhysRegUsed(PhysReg);
+}
+
/// tryEvict - Try to evict all interferences for a physreg.
/// @param VirtReg Currently unassigned virtual register.
/// @param Order Physregs to try.
@@ -861,13 +873,12 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
continue;
// The first use of a callee-saved register in a function has cost 1.
// Don't start using a CSR when the CostPerUseLimit is low.
- if (CostPerUseLimit == 1)
- if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
- if (!MRI->isPhysRegUsed(CSR)) {
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
- << PrintReg(CSR, TRI) << '\n');
- continue;
- }
+ if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
+ << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
+ << '\n');
+ continue;
+ }
if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
continue;
@@ -1348,9 +1359,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
unsigned BestCand = NoCand;
Order.rewind();
while (unsigned PhysReg = Order.next()) {
- if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
- if (IgnoreCSR && !MRI->isPhysRegUsed(CSR))
- continue;
+ if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg))
+ continue;
// Discard bad candidates before we run out of interference cache cursors.
// This will only affect register classes with a lot of registers (>32).
@@ -2134,7 +2144,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
unsigned ItVirtReg = (*It)->reg;
if (VRM->hasPhys(ItVirtReg))
Matrix->unassign(**It);
- Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]);
+ unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg];
+ Matrix->assign(**It, ItPhysReg);
}
}
@@ -2441,16 +2452,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
- // We check other options if we are using a CSR for the first time.
- bool CSRFirstUse = false;
- if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
- if (!MRI->isPhysRegUsed(CSR))
- CSRFirstUse = true;
-
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
// register.
- if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) {
+ if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
+ NewVRegs.empty()) {
unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
CostPerUseLimit, NewVRegs);
if (CSRReg || !NewVRegs.empty())
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 450a3051c6ff..c3786e552a13 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -77,6 +77,16 @@ void RegPressureTracker::dump() const {
P.dump(TRI);
}
+void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
+ for (const PressureChange &Change : *this) {
+ if (!Change.isValid() || Change.getUnitInc() == 0)
+ continue;
+ dbgs() << " " << TRI.getRegPressureSetName(Change.getPSet())
+ << " " << Change.getUnitInc();
+ }
+ dbgs() << '\n';
+}
+
/// Increase the current pressure as impacted by these registers and bump
/// the high water mark if needed.
void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
@@ -787,6 +797,8 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff,
RegPressureDelta Delta2;
getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit);
if (Delta != Delta2) {
+ dbgs() << "PDiff: ";
+ PDiff->dump(*TRI);
dbgs() << "DELTA: " << *MI;
if (Delta.Excess.isValid())
dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet())
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6056d93ddc7a..52d620b1d540 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -443,8 +443,9 @@ namespace {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
- return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
- : TLI.getPointerTy();
+ auto &DL = DAG.getDataLayout();
+ return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
+ : TLI.getPointerTy(DL);
}
/// This method returns true if we are running before type legalization or
@@ -456,7 +457,7 @@ namespace {
/// Convenience wrapper around TargetLowering::getSetCCResultType
EVT getSetCCResultType(EVT VT) const {
- return TLI.getSetCCResultType(*DAG.getContext(), VT);
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
};
}
@@ -3111,7 +3112,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// For big endian targets, we need to add an offset to the pointer
// to load the correct bytes. For little endian systems, we merely
// need to read fewer bytes from the same pointer.
- if (TLI.isBigEndian()) {
+ if (DAG.getDataLayout().isBigEndian()) {
unsigned LVTStoreBytes = LoadedVT.getStoreSize();
unsigned EVTStoreBytes = ExtVT.getStoreSize();
unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
@@ -6675,7 +6676,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
- if (TLI.isBigEndian()) {
+ if (DAG.getDataLayout().isBigEndian()) {
unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
@@ -6873,7 +6874,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- bool isLE = TLI.isLittleEndian();
+ bool isLE = DAG.getDataLayout().isLittleEndian();
// noop truncate
if (N0.getValueType() == N->getValueType(0))
@@ -6926,7 +6927,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue EltNo = N0->getOperand(1);
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- EVT IndexTy = TLI.getVectorIdxTy();
+ EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
@@ -7093,8 +7094,8 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
!LD2->isVolatile() &&
DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
unsigned Align = LD1->getAlignment();
- unsigned NewAlign = TLI.getDataLayout()->
- getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+ unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+ VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign <= Align &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
@@ -7150,13 +7151,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile() &&
// Do not remove the cast if the types differ in endian layout.
- TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
- TLI.hasBigEndianPartOrdering(VT) &&
+ TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
+ TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- unsigned Align = TLI.getDataLayout()->
- getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+ unsigned Align = DAG.getDataLayout().getABITypeAlignment(
+ VT.getTypeForEVT(*DAG.getContext()));
unsigned OrigAlign = LN0->getAlignment();
if (Align <= OrigAlign) {
@@ -7368,7 +7369,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e;
i += NumInputsPerOutput) {
- bool isLE = TLI.isLittleEndian();
+ bool isLE = DAG.getDataLayout().isLittleEndian();
APInt NewBits = APInt(DstBitSize, 0);
bool EltIsUndef = true;
for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
@@ -7415,7 +7416,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
}
// For big endian targets, swap the order of the pieces of each element.
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
}
@@ -8373,6 +8374,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (TLI.combineRepeatedFPDivisors(Users.size())) {
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+ // FIXME: This optimization requires some level of fast-math, so the
+ // created reciprocal node should at least have the 'allowReciprocal'
+ // fast-math-flag set.
SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
// Dividend / Divisor -> Dividend * Reciprocal
@@ -8381,10 +8385,14 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (Dividend != FPOne) {
SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
Reciprocal);
- DAG.ReplaceAllUsesWith(U, NewNode.getNode());
+ CombineTo(U, NewNode);
+ } else if (U != Reciprocal.getNode()) {
+ // In the absence of fast-math-flags, this user node is always the
+ // same node as Reciprocal, but with FMF they may be different nodes.
+ CombineTo(U, Reciprocal);
}
}
- return SDValue();
+ return SDValue(N, 0); // N was replaced.
}
}
@@ -8406,30 +8414,29 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
- if (DAG.getTarget().Options.UnsafeFPMath &&
- !TLI.isFsqrtCheap()) {
- // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
- if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
- EVT VT = RV.getValueType();
- SDLoc DL(N);
- RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
- AddToWorklist(RV.getNode());
+ if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
+ return SDValue();
- // Unfortunately, RV is now NaN if the input was exactly 0.
- // Select out this case and force the answer to 0.
- SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
- SDValue ZeroCmp =
- DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT),
- N->getOperand(0), Zero, ISD::SETEQ);
- AddToWorklist(ZeroCmp.getNode());
- AddToWorklist(RV.getNode());
+ // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
+ SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
+ if (!RV)
+ return SDValue();
+
+ EVT VT = RV.getValueType();
+ SDLoc DL(N);
+ RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+ AddToWorklist(RV.getNode());
- RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
- DL, VT, ZeroCmp, Zero, RV);
- return RV;
- }
- }
- return SDValue();
+ // Unfortunately, RV is now NaN if the input was exactly 0.
+ // Select out this case and force the answer to 0.
+ SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
+ EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
+ AddToWorklist(ZeroCmp.getNode());
+ AddToWorklist(RV.getNode());
+
+ return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+ ZeroCmp, Zero, RV);
}
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
@@ -9144,7 +9151,8 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
} else
return false;
- return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()), AS);
+ return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
+ VT.getTypeForEVT(*DAG.getContext()), AS);
}
/// Try turning a load/store into a pre-indexed load/store when the base
@@ -9869,8 +9877,7 @@ struct LoadedSlice {
/// \pre DAG != nullptr.
uint64_t getOffsetFromBase() const {
assert(DAG && "Missing context.");
- bool IsBigEndian =
- DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
+ bool IsBigEndian = DAG->getDataLayout().isBigEndian();
assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
uint64_t Offset = Shift / 8;
unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
@@ -9953,7 +9960,7 @@ struct LoadedSlice {
// Check if it will be merged with the load.
// 1. Check the alignment constraint.
- unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
+ unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
ResVT.getTypeForEVT(*DAG->getContext()));
if (RequiredAlignment > getAlignment())
@@ -10321,7 +10328,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
unsigned StOffset;
unsigned NewAlign = St->getAlignment();
- if (DAG.getTargetLoweringInfo().isLittleEndian())
+ if (DAG.getDataLayout().isLittleEndian())
StOffset = ByteShift;
else
StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
@@ -10434,12 +10441,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
uint64_t PtrOff = ShAmt / 8;
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
- if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
+ if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
return SDValue();
SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
@@ -10503,7 +10510,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
unsigned LDAlign = LD->getAlignment();
unsigned STAlign = ST->getAlignment();
Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
+ unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
if (LDAlign < ABIAlign || STAlign < ABIAlign)
return SDValue();
@@ -10685,7 +10692,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// Construct a single integer constant which is made of the smaller
// constant inputs.
- bool IsLE = TLI.isLittleEndian();
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
for (unsigned i = 0; i < NumElem ; ++i) {
unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
@@ -10743,7 +10750,7 @@ static bool allowableAlignment(const SelectionDAG &DAG,
return true;
Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+ unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty);
return (Align >= ABIAlignment);
}
@@ -11205,8 +11212,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
ST->isUnindexed()) {
unsigned OrigAlign = ST->getAlignment();
EVT SVT = Value.getOperand(0).getValueType();
- unsigned Align = TLI.getDataLayout()->
- getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+ unsigned Align = DAG.getDataLayout().getABITypeAlignment(
+ SVT.getTypeForEVT(*DAG.getContext()));
if (Align <= OrigAlign &&
((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
@@ -11265,7 +11272,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
- if (TLI.isBigEndian()) std::swap(Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
@@ -11514,7 +11522,7 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
unsigned Align = OriginalLoad->getAlignment();
- unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+ unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
VecEltVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
@@ -11648,7 +11656,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// scalar_to_vector here as well.
if (!LegalOperations) {
- EVT IndexTy = TLI.getVectorIdxTy();
+ EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
}
@@ -11825,7 +11833,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
if (!ValidTypes)
return SDValue();
- bool isLE = TLI.isLittleEndian();
+ bool isLE = DAG.getDataLayout().isLittleEndian();
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
@@ -12079,10 +12087,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// Try to replace VecIn1 with two extract_subvectors
// No need to update the masks, they should still be correct.
- VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
- DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy()));
- VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+ VecIn2 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ DAG.getConstant(VT.getVectorNumElements(), dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ VecIn1 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
} else
return SDValue();
}
@@ -13354,12 +13365,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
const_cast<ConstantFP*>(TV->getConstantFPValue())
};
Type *FPTy = Elts[0]->getType();
- const DataLayout &TD = *TLI.getDataLayout();
+ const DataLayout &TD = DAG.getDataLayout();
// Create a ConstantArray of the two constants.
Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
- SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
- TD.getPrefTypeAlignment(FPTy));
+ SDValue CPIdx =
+ DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
+ TD.getPrefTypeAlignment(FPTy));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
// Get the offsets to the 0 and 1 element of the array so that we can
@@ -13832,6 +13844,15 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
// If they are both volatile then they cannot be reordered.
if (Op0->isVolatile() && Op1->isVolatile()) return true;
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias. These should really be checking the equivalent of mayWrite,
+ // but it only matters for memory nodes other than load /store.
+ if (Op0->isInvariant() && Op1->writeMem())
+ return false;
+
+ if (Op1->isInvariant() && Op0->writeMem())
+ return false;
+
// Gather base node and offset information.
SDValue Base1, Base2;
int64_t Offset1, Offset2;
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 5452b1721bb4..2b9ba2c1b534 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -166,7 +166,7 @@ bool FastISel::hasTrivialKill(const Value *V) {
}
unsigned FastISel::getRegForValue(const Value *V) {
- EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+ EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true);
// Don't handle non-simple values in FastISel.
if (!RealVT.isSimple())
return 0;
@@ -228,7 +228,7 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
if (!Reg) {
// Try to emit the constant by using an integer constant with a cast.
const APFloat &Flt = CF->getValueAPF();
- EVT IntVT = TLI.getPointerTy();
+ EVT IntVT = TLI.getPointerTy(DL);
uint64_t x[2];
uint32_t IntBitWidth = IntVT.getSizeInBits();
@@ -321,7 +321,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
bool IdxNIsKill = hasTrivialKill(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend it.
- MVT PtrVT = TLI.getPointerTy();
+ MVT PtrVT = TLI.getPointerTy(DL);
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
if (IdxVT.bitsLT(PtrVT)) {
IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN,
@@ -493,7 +493,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
// FIXME: What's a good SWAG number for MaxOffs?
uint64_t MaxOffs = 2048;
Type *Ty = I->getOperand(0)->getType();
- MVT VT = TLI.getPointerTy();
+ MVT VT = TLI.getPointerTy(DL);
for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1,
E = I->op_end();
OI != E; ++OI) {
@@ -908,10 +908,10 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
// Handle the incoming return values from the call.
CLI.clearIns();
SmallVector<EVT, 4> RetTys;
- ComputeValueVTs(TLI, CLI.RetTy, RetTys);
+ ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys);
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI);
+ GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);
bool CanLowerReturn = TLI.CanLowerReturn(
CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext());
@@ -976,7 +976,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
// not there, but there are cases it cannot get right.
unsigned FrameAlign = Arg.Alignment;
if (!FrameAlign)
- FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
Flags.setByValSize(FrameSize);
Flags.setByValAlign(FrameAlign);
}
@@ -1245,8 +1245,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
}
bool FastISel::selectCast(const User *I, unsigned Opcode) {
- EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other ||
!DstVT.isSimple())
@@ -1288,8 +1288,8 @@ bool FastISel::selectBitCast(const User *I) {
}
// Bitcasts of other values become reg-reg copies or BITCAST operators.
- EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType());
- EVT DstEVT = TLI.getValueType(I->getType());
+ EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstEVT = TLI.getValueType(DL, I->getType());
if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
!TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT))
// Unhandled type. Halt "fast" selection and bail.
@@ -1413,7 +1413,7 @@ bool FastISel::selectFNeg(const User *I) {
bool OpRegIsKill = hasTrivialKill(I);
// If the target has ISD::FNEG, use it.
- EVT VT = TLI.getValueType(I->getType());
+ EVT VT = TLI.getValueType(DL, I->getType());
unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
OpReg, OpRegIsKill);
if (ResultReg) {
@@ -1456,7 +1456,7 @@ bool FastISel::selectExtractValue(const User *U) {
// Make sure we only try to handle extracts with a legal result. But also
// allow i1 because it's easy.
- EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
+ EVT RealVT = TLI.getValueType(DL, EVI->getType(), /*AllowUnknown=*/true);
if (!RealVT.isSimple())
return false;
MVT VT = RealVT.getSimpleVT();
@@ -1480,7 +1480,7 @@ bool FastISel::selectExtractValue(const User *U) {
unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
SmallVector<EVT, 4> AggValueVTs;
- ComputeValueVTs(TLI, AggTy, AggValueVTs);
+ ComputeValueVTs(TLI, DL, AggTy, AggValueVTs);
for (unsigned i = 0; i < VTIndex; i++)
ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
@@ -1582,8 +1582,8 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
case Instruction::IntToPtr: // Deliberate fall-through.
case Instruction::PtrToInt: {
- EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
if (DstVT.bitsGT(SrcVT))
return selectCast(I, ISD::ZERO_EXTEND);
if (DstVT.bitsLT(SrcVT))
@@ -1612,7 +1612,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
bool SkipTargetIndependentISel)
: FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
- TM(FuncInfo.MF->getTarget()), DL(*TM.getDataLayout()),
+ TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()),
TII(*MF->getSubtarget().getInstrInfo()),
TLI(*MF->getSubtarget().getTargetLowering()),
TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
@@ -2037,7 +2037,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// own moves. Second, this check is necessary because FastISel doesn't
// use CreateRegs to create registers, so it always creates
// exactly one register for each non-void instruction.
- EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+ EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
// Handle integer promotions, though, because they're common and easy.
if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index ecaa2c972719..cc306cbf5ae4 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -90,7 +90,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI);
+ GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI,
+ mf.getDataLayout());
CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF,
Fn->isVarArg(), Outs, Fn->getContext());
@@ -106,9 +107,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (AI->isStaticAlloca()) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
Type *Ty = AI->getAllocatedType();
- uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
+ uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
unsigned Align =
- std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty),
+ std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
AI->getAlignment());
TySize *= CUI->getZExtValue(); // Get total allocated size.
@@ -118,10 +119,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
} else {
- unsigned Align = std::max(
- (unsigned)TLI->getDataLayout()->getPrefTypeAlignment(
- AI->getAllocatedType()),
- AI->getAlignment());
+ unsigned Align =
+ std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(
+ AI->getAllocatedType()),
+ AI->getAlignment());
unsigned StackAlign =
MF->getSubtarget().getFrameLowering()->getStackAlignment();
if (Align <= StackAlign)
@@ -138,7 +139,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
std::vector<TargetLowering::AsmOperandInfo> Ops =
- TLI->ParseConstraints(TRI, CS);
+ TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS);
for (size_t I = 0, E = Ops.size(); I != E; ++I) {
TargetLowering::AsmOperandInfo &Op = Ops[I];
if (Op.Type == InlineAsm::isClobber) {
@@ -148,7 +149,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
Op.ConstraintVT);
if (PhysReg.first == SP)
- MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
+ MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
}
}
}
@@ -236,7 +237,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
assert(PHIReg && "PHI node does not have an assigned virtual register!");
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, PN->getType(), ValueVTs);
+ ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs);
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
@@ -366,7 +367,7 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, Ty, ValueVTs);
+ ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
unsigned FirstReg = 0;
for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
@@ -413,7 +414,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
return;
SmallVector<EVT, 1> ValueVTs;
- ComputeValueVTs(*TLI, Ty, ValueVTs);
+ ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
assert(ValueVTs.size() == 1 &&
"PHIs with non-vector integer types should have a single VT.");
EVT IntVT = ValueVTs[0];
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 42595cb010c2..5ec10308dc28 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -406,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
Type *Type = CP->getType();
// MachineConstantPool wants an explicit alignment.
if (Align == 0) {
- Align = MF->getTarget().getDataLayout()->getPrefTypeAlignment(Type);
+ Align = MF->getDataLayout().getPrefTypeAlignment(Type);
if (Align == 0) {
// Alignment of vector types. FIXME!
- Align = MF->getTarget().getDataLayout()->getTypeAllocSize(Type);
+ Align = MF->getDataLayout().getTypeAllocSize(Type);
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c0d7871bf08b..21ab07234c81 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -65,7 +65,7 @@ class SelectionDAGLegalize {
SmallSetVector<SDNode *, 16> *UpdatedNodes;
EVT getSetCCResultType(EVT VT) const {
- return TLI.getSetCCResultType(*DAG.getContext(), VT);
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
// Libcall insertion helpers.
@@ -269,7 +269,8 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
}
}
- SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+ SDValue CPIdx =
+ DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
if (Extend) {
SDValue Result =
@@ -331,7 +332,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
SDValue Store = DAG.getTruncStore(Chain, dl,
Val, StackPtr, MachinePointerInfo(),
StoredVT, false, false, 0);
- SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy(AS));
+ SDValue Increment = DAG.getConstant(
+ RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout(), AS));
SmallVector<SDValue, 8> Stores;
unsigned Offset = 0;
@@ -385,24 +387,27 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
int IncrementSize = NumBits / 8;
// Divide the stored value in two parts.
- SDValue ShiftAmount = DAG.getConstant(NumBits, dl,
- TLI.getShiftAmountTy(Val.getValueType()));
+ SDValue ShiftAmount =
+ DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Val.getValueType(),
+ DAG.getDataLayout()));
SDValue Lo = Val;
SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
// Store the two parts
SDValue Store1, Store2;
- Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
- ST->getPointerInfo(), NewStoredVT,
+ Store1 = DAG.getTruncStore(Chain, dl,
+ DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
+ Ptr, ST->getPointerInfo(), NewStoredVT,
ST->isVolatile(), ST->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, TLI.getPointerTy(AS)));
+ DAG.getConstant(IncrementSize, dl,
+ TLI.getPointerTy(DAG.getDataLayout(), AS)));
Alignment = MinAlign(Alignment, IncrementSize);
- Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize),
- NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
- Alignment, ST->getAAInfo());
+ Store2 = DAG.getTruncStore(
+ Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT,
+ ST->isVolatile(), ST->isNonTemporal(), Alignment, ST->getAAInfo());
SDValue Result =
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
@@ -448,7 +453,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Make sure the stack slot is also aligned for the register type.
SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
- SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy());
+ SDValue Increment =
+ DAG.getConstant(RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout()));
SmallVector<SDValue, 8> Stores;
SDValue StackPtr = StackBase;
unsigned Offset = 0;
@@ -522,7 +528,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Load the value in two parts
SDValue Lo, Hi;
- if (TLI.isLittleEndian()) {
+ if (DAG.getDataLayout().isLittleEndian()) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), LD->isInvariant(), Alignment,
@@ -549,8 +555,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
}
// aggregate the two parts
- SDValue ShiftAmount = DAG.getConstant(NumBits, dl,
- TLI.getShiftAmountTy(Hi.getValueType()));
+ SDValue ShiftAmount =
+ DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Hi.getValueType(),
+ DAG.getDataLayout()));
SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
@@ -581,7 +588,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
EVT VT = Tmp1.getValueType();
EVT EltVT = VT.getVectorElementType();
EVT IdxVT = Tmp3.getValueType();
- EVT PtrVT = TLI.getPointerTy();
+ EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
SDValue StackPtr = DAG.CreateStackTemporary(VT);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
@@ -677,7 +684,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt();
SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32);
SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32);
- if (TLI.isBigEndian()) std::swap(Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
isNonTemporal, Alignment, AAInfo);
@@ -724,7 +732,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
unsigned Align = ST->getAlignment();
if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
if (Align < ABIAlignment)
ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
@@ -756,6 +764,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
EVT StVT = ST->getMemoryVT();
unsigned StWidth = StVT.getSizeInBits();
+ auto &DL = DAG.getDataLayout();
if (StWidth != StVT.getStoreSizeInBits()) {
// Promote to a byte-sized store with upper bits zero if not
@@ -782,7 +791,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Lo, Hi;
unsigned IncrementSize;
- if (TLI.isLittleEndian()) {
+ if (DL.isLittleEndian()) {
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
// Store the bottom RoundWidth bits.
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
@@ -795,9 +804,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, dl,
Ptr.getValueType()));
- Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
- DAG.getConstant(RoundWidth, dl,
- TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
@@ -806,9 +816,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Big endian - avoid unaligned stores.
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
// Store the top RoundWidth bits.
- Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
- DAG.getConstant(ExtraWidth, dl,
- TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
RoundVT, isVolatile, isNonTemporal, Alignment,
AAInfo);
@@ -838,7 +849,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// expand it.
if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment = DL.getABITypeAlignment(Ty);
if (Align < ABIAlignment)
ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
@@ -890,8 +901,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// expand it.
if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) {
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getDataLayout()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
if (Align < ABIAlignment){
ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
}
@@ -995,8 +1005,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
SDValue Lo, Hi, Ch;
unsigned IncrementSize;
+ auto &DL = DAG.getDataLayout();
- if (TLI.isLittleEndian()) {
+ if (DL.isLittleEndian()) {
// EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
// Load the bottom RoundWidth bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
@@ -1020,9 +1031,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Hi.getValue(1));
// Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(RoundWidth, dl,
- TLI.getShiftAmountTy(Hi.getValueType())));
+ Hi = DAG.getNode(
+ ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth, dl,
+ TLI.getShiftAmountTy(Hi.getValueType(), DL)));
// Join the hi and lo parts.
Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1051,9 +1063,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Hi.getValue(1));
// Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(ExtraWidth, dl,
- TLI.getShiftAmountTy(Hi.getValueType())));
+ Hi = DAG.getNode(
+ ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth, dl,
+ TLI.getShiftAmountTy(Hi.getValueType(), DL)));
// Join the hi and lo parts.
Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1086,7 +1099,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
unsigned Align = LD->getAlignment();
if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
if (Align < ABIAlignment){
ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
}
@@ -1439,7 +1452,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
- Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
+ Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
SDValue NewLoad;
@@ -1491,7 +1504,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
- Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
+ Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
StackPtr);
@@ -1569,15 +1582,16 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
// Convert to an integer with the same sign bit.
SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
} else {
+ auto &DL = DAG.getDataLayout();
// Store the float to memory, then load the sign part out as an integer.
- MVT LoadTy = TLI.getPointerTy();
+ MVT LoadTy = TLI.getPointerTy(DL);
// First create a temporary that is aligned for both the load and store.
SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
// Then store the float to it.
SDValue Ch =
DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
false, false, 0);
- if (TLI.isBigEndian()) {
+ if (DL.isBigEndian()) {
assert(FloatVT.isByteSized() && "Unsupported floating point type!");
// Load out a legal integer with the same sign bit as the float.
SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
@@ -1599,9 +1613,10 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
(FloatVT.getSizeInBits() - 8 * ByteOffset);
assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
if (BitShift)
- SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
- DAG.getConstant(BitShift, dl,
- TLI.getShiftAmountTy(SignBit.getValueType())));
+ SignBit = DAG.getNode(
+ ISD::SHL, dl, LoadTy, SignBit,
+ DAG.getConstant(BitShift, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(), DL)));
}
}
// Now get the sign bit proper, by seeing whether the value is negative.
@@ -1777,9 +1792,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
EVT DestVT,
SDLoc dl) {
// Create the stack frame object.
- unsigned SrcAlign =
- TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType().
- getTypeForEVT(*DAG.getContext()));
+ unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
+ SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
@@ -1790,7 +1804,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
unsigned SlotSize = SlotVT.getSizeInBits();
unsigned DestSize = DestVT.getSizeInBits();
Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
- unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType);
+ unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType);
// Emit a store to the stack slot. Use a truncstore if the input value is
// later than DestVT.
@@ -1994,7 +2008,8 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
}
}
Constant *CP = ConstantVector::get(CV);
- SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+ SDValue CPIdx =
+ DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
@@ -2058,7 +2073,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy());
+ TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
@@ -2106,7 +2121,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy());
+ TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
@@ -2140,7 +2155,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy());
+ TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
@@ -2277,7 +2292,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Args.push_back(Entry);
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy());
+ TLI.getPointerTy(DAG.getDataLayout()));
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2389,7 +2404,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
Args.push_back(Entry);
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy());
+ TLI.getPointerTy(DAG.getDataLayout()));
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2426,7 +2441,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Hi = StackSlot;
SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(),
StackSlot, WordOff);
- if (TLI.isLittleEndian())
+ if (DAG.getDataLayout().isLittleEndian())
std::swap(Hi, Lo);
// if signed map to unsigned space
@@ -2509,8 +2524,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
if (!isSigned) {
SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
- SDValue ShiftConst =
- DAG.getConstant(1, dl, TLI.getShiftAmountTy(Op0.getValueType()));
+ SDValue ShiftConst = DAG.getConstant(
+ 1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout()));
SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
SDValue AndConst = DAG.getConstant(1, dl, MVT::i64);
SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
@@ -2545,7 +2560,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
MVT::i64),
ISD::SETUGE);
SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0);
- EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
+ EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout());
SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
DAG.getConstant(32, dl, SHVT));
@@ -2584,11 +2599,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
}
- if (TLI.isLittleEndian()) FF <<= 32;
+ if (DAG.getDataLayout().isLittleEndian())
+ FF <<= 32;
Constant *FudgeFactor = ConstantInt::get(
Type::getInt64Ty(*DAG.getContext()), FF);
- SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+ SDValue CPIdx =
+ DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset);
Alignment = std::min(Alignment, 4u);
@@ -2699,7 +2716,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
/// Open code the operations for BSWAP of the specified operation.
SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
EVT VT = Op.getValueType();
- EVT SHVT = TLI.getShiftAmountTy(VT);
+ EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled Expand type in BSWAP!");
@@ -2756,7 +2773,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
default: llvm_unreachable("Cannot expand this yet!");
case ISD::CTPOP: {
EVT VT = Op.getValueType();
- EVT ShVT = TLI.getShiftAmountTy(VT);
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
unsigned Len = VT.getSizeInBits();
assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
@@ -2814,7 +2831,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
//
// Ref: "Hacker's Delight" by Henry Warren
EVT VT = Op.getValueType();
- EVT ShVT = TLI.getShiftAmountTy(VT);
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
unsigned len = VT.getSizeInBits();
for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
@@ -2903,10 +2920,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TargetLowering::ArgListTy Args;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__sync_synchronize",
- TLI.getPointerTy()), std::move(Args), 0);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -3002,10 +3021,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If this operation is not supported, lower it to 'abort()' call
TargetLowering::ArgListTy Args;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("abort", TLI.getPointerTy()),
- std::move(Args), 0);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("abort",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
Results.push_back(CallResult.second);
@@ -3028,7 +3049,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// SAR. However, it is doubtful that any exist.
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
EVT VT = Node->getValueType(0);
- EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
+ EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
if (VT.isVector())
ShiftAmountTy = VT;
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
@@ -3092,9 +3113,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp2 = Node->getOperand(1);
unsigned Align = Node->getConstantOperandVal(3);
- SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
- MachinePointerInfo(V),
- false, false, false, 0);
+ SDValue VAListLoad =
+ DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, false, 0);
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -3111,10 +3132,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Increment the pointer, VAList, to the next vaarg
Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
- DAG.getConstant(TLI.getDataLayout()->
- getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
- dl,
- VAList.getValueType()));
+ DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(
+ VT.getTypeForEVT(*DAG.getContext())),
+ dl, VAList.getValueType()));
// Store the incremented VAList to the legalized pointer
Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
MachinePointerInfo(V), false, false, 0);
@@ -3129,9 +3149,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// output, returning the chain.
const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
- Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
- Node->getOperand(2), MachinePointerInfo(VS),
- false, false, false, 0);
+ Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl,
+ Node->getOperand(0), Node->getOperand(2),
+ MachinePointerInfo(VS), false, false, false, 0);
Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
MachinePointerInfo(VD), false, false, 0);
Results.push_back(Tmp1);
@@ -3226,14 +3246,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
unsigned Idx = Mask[i];
if (Idx < NumElems)
- Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Op0,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())));
+ Ops.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
else
- Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Op1,
- DAG.getConstant(Idx - NumElems, dl,
- TLI.getVectorIdxTy())));
+ Ops.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1,
+ DAG.getConstant(Idx - NumElems, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout()))));
}
Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
@@ -3247,8 +3267,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
// 1 -> Hi
Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
- DAG.getConstant(OpTy.getSizeInBits()/2, dl,
- TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
+ DAG.getConstant(OpTy.getSizeInBits() / 2, dl,
+ TLI.getShiftAmountTy(
+ Node->getOperand(0).getValueType(),
+ DAG.getDataLayout())));
Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
} else {
// 0 -> Lo
@@ -3646,8 +3668,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) {
Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
- SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), dl,
- TLI.getShiftAmountTy(HalfType));
+ SDValue Shift =
+ DAG.getConstant(HalfType.getSizeInBits(), dl,
+ TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
break;
@@ -3759,12 +3782,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// The high part is obtained by SRA'ing all but one of the bits of low
// part.
unsigned LoSize = VT.getSizeInBits();
- SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy()));
- SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy()));
+ SDValue HiLHS =
+ DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ SDValue HiRHS =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
// Here we're passing the 2 arguments explicitly as 4 arguments that are
// pre-lowered to the correct types. This all depends upon WideVT not
@@ -3785,8 +3810,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
if (isSigned) {
- Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, dl,
- TLI.getShiftAmountTy(BottomHalf.getValueType()));
+ Tmp1 = DAG.getConstant(
+ VT.getSizeInBits() - 1, dl,
+ TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1,
ISD::SETNE);
@@ -3802,9 +3828,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT PairTy = Node->getValueType(0);
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
- Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
- DAG.getConstant(PairTy.getSizeInBits()/2, dl,
- TLI.getShiftAmountTy(PairTy)));
+ Tmp2 = DAG.getNode(
+ ISD::SHL, dl, PairTy, Tmp2,
+ DAG.getConstant(PairTy.getSizeInBits() / 2, dl,
+ TLI.getShiftAmountTy(PairTy, DAG.getDataLayout())));
Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
break;
}
@@ -3828,9 +3855,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Table = Node->getOperand(1);
SDValue Index = Node->getOperand(2);
- EVT PTy = TLI.getPointerTy();
+ EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
- const DataLayout &TD = *TLI.getDataLayout();
+ const DataLayout &TD = DAG.getDataLayout();
unsigned EntrySize =
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
@@ -3936,7 +3963,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
"Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
"expanded.");
- EVT CCVT = TLI.getSetCCResultType(*DAG.getContext(), CmpVT);
+ EVT CCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC);
Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
break;
@@ -4036,14 +4064,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SmallVector<SDValue, 8> Scalars;
for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getScalarType(),
- Node->getOperand(0),
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
- SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getScalarType(),
- Node->getOperand(1),
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
+ SDValue Ex = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0),
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Sh = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1),
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
VT.getScalarType(), Ex, Sh));
}
@@ -4114,9 +4140,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
- Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
- DAG.getConstant(DiffBits, dl,
- TLI.getShiftAmountTy(NVT)));
+ Tmp1 = DAG.getNode(
+ ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(DiffBits, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
Results.push_back(Tmp1);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 37fdf4453fd4..3c50a4155731 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -218,29 +218,35 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
unsigned RSize = RVT.getSizeInBits();
// First get the sign bit of second operand.
- SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
- DAG.getConstant(RSize - 1, dl,
- TLI.getShiftAmountTy(RVT)));
+ SDValue SignBit = DAG.getNode(
+ ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
+ DAG.getConstant(RSize - 1, dl,
+ TLI.getShiftAmountTy(RVT, DAG.getDataLayout())));
SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
// Shift right or sign-extend it if the two operands have different types.
int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
if (SizeDiff > 0) {
- SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
- DAG.getConstant(SizeDiff, dl,
- TLI.getShiftAmountTy(SignBit.getValueType())));
+ SignBit =
+ DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
} else if (SizeDiff < 0) {
SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
- SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
- DAG.getConstant(-SizeDiff, dl,
- TLI.getShiftAmountTy(SignBit.getValueType())));
+ SignBit =
+ DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
}
// Clear the sign bit of the first operand.
- SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
- DAG.getConstant(LSize - 1, dl,
- TLI.getShiftAmountTy(LVT)));
+ SDValue Mask = DAG.getNode(
+ ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
+ DAG.getConstant(LSize - 1, dl,
+ TLI.getShiftAmountTy(LVT, DAG.getDataLayout())));
Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT));
LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f41202c4f8a4..9f060a09a0f3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -282,7 +282,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
Lo = BitConvertToInteger(Lo);
Hi = BitConvertToInteger(Hi);
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
@@ -310,8 +310,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
- DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT)));
+ return DAG.getNode(
+ ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -799,7 +801,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
}
// Handle endianness of the load.
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::reverse(Parts.begin(), Parts.end());
// Assemble the parts in the promoted type.
@@ -809,8 +811,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
// Shift it to the right position and "or" it in.
Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
- DAG.getConstant(i*RegVT.getSizeInBits(), dl,
- TLI.getPointerTy()));
+ DAG.getConstant(i * RegVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
}
@@ -1004,7 +1006,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
DAG.getConstant(OVT.getSizeInBits(), dl,
- TLI.getPointerTy()));
+ TLI.getPointerTy(DAG.getDataLayout())));
return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
}
@@ -1063,7 +1065,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
// Promote the index.
SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N),
- TLI.getVectorIdxTy());
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
N->getOperand(1), Idx), 0);
}
@@ -1356,9 +1358,9 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
return ExpandChainLibCall(LC, Node, false);
}
-/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// N is a shift by a value that needs to be expanded,
/// and the shift amount is a constant 'Amt'. Expand the operation.
-void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi) {
SDLoc DL(N);
// Expand the incoming operand to be shifted, so that we have its parts
@@ -1379,9 +1381,9 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
EVT ShTy = N->getOperand(1).getValueType();
if (N->getOpcode() == ISD::SHL) {
- if (Amt > VTBits) {
+ if (Amt.ugt(VTBits)) {
Lo = Hi = DAG.getConstant(0, DL, NVT);
- } else if (Amt > NVTBits) {
+ } else if (Amt.ugt(NVTBits)) {
Lo = DAG.getConstant(0, DL, NVT);
Hi = DAG.getNode(ISD::SHL, DL,
NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy));
@@ -1403,16 +1405,15 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(Amt, DL, ShTy)),
DAG.getNode(ISD::SRL, DL, NVT, InL,
- DAG.getConstant(NVTBits - Amt, DL, ShTy)));
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
}
return;
}
if (N->getOpcode() == ISD::SRL) {
- if (Amt > VTBits) {
- Lo = DAG.getConstant(0, DL, NVT);
- Hi = DAG.getConstant(0, DL, NVT);
- } else if (Amt > NVTBits) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = DAG.getConstant(0, DL, NVT);
+ } else if (Amt.ugt(NVTBits)) {
Lo = DAG.getNode(ISD::SRL, DL,
NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy));
Hi = DAG.getConstant(0, DL, NVT);
@@ -1424,19 +1425,19 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, DL, ShTy)),
DAG.getNode(ISD::SHL, DL, NVT, InH,
- DAG.getConstant(NVTBits - Amt, DL, ShTy)));
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
}
return;
}
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
- if (Amt > VTBits) {
+ if (Amt.ugt(VTBits)) {
Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits - 1, DL, ShTy));
- } else if (Amt > NVTBits) {
+ } else if (Amt.ugt(NVTBits)) {
Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
- DAG.getConstant(Amt-NVTBits, DL, ShTy));
+ DAG.getConstant(Amt - NVTBits, DL, ShTy));
Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits - 1, DL, ShTy));
} else if (Amt == NVTBits) {
@@ -1448,7 +1449,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, DL, ShTy)),
DAG.getNode(ISD::SHL, DL, NVT, InH,
- DAG.getConstant(NVTBits - Amt, DL, ShTy)));
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
}
}
@@ -1808,7 +1809,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
// The high part replicates the sign bit of Lo, make it explicit.
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
- DAG.getConstant(NVTBits - 1, dl, TLI.getPointerTy()));
+ DAG.getConstant(NVTBits - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
}
}
@@ -1975,7 +1977,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// lo part.
unsigned LoSize = Lo.getValueType().getSizeInBits();
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
- DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy()));
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
} else if (ExtType == ISD::ZEXTLOAD) {
// The high part is just a zero.
Hi = DAG.getConstant(0, dl, NVT);
@@ -1984,7 +1987,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// The high part is undefined.
Hi = DAG.getUNDEF(NVT);
}
- } else if (TLI.isLittleEndian()) {
+ } else if (DAG.getDataLayout().isLittleEndian()) {
// Little-endian - low bits are at low addresses.
Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, isInvariant, Alignment,
@@ -2039,15 +2042,16 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
if (ExcessBits < NVT.getSizeInBits()) {
// Transfer low bits from the bottom of Hi to the top of Lo.
- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
- DAG.getNode(ISD::SHL, dl, NVT, Hi,
- DAG.getConstant(ExcessBits, dl,
- TLI.getPointerTy())));
+ Lo = DAG.getNode(
+ ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout()))));
// Move high bits to the right position in Hi.
- Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
- NVT, Hi,
+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT,
+ Hi,
DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
- TLI.getPointerTy()));
+ TLI.getPointerTy(DAG.getDataLayout())));
}
}
@@ -2173,7 +2177,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// If we can emit an efficient shift operation, do so now. Check to see if
// the RHS is a constant.
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+ return ExpandShiftByConstant(N, CN->getAPIntValue(), Lo, Hi);
// If we can determine that the high bit of the shift is zero or one, even if
// the low bits are variable, emit this shift in an optimized form.
@@ -2206,7 +2210,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// have an illegal type. Fix that first by casting the operand, otherwise
// the new SHL_PARTS operation would need further legalization.
SDValue ShiftOp = N->getOperand(1);
- EVT ShiftTy = TLI.getShiftAmountTy(VT);
+ EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
assert(ShiftTy.getScalarType().getSizeInBits() >=
Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
"ShiftAmountTy is too small to cover the range of this type!");
@@ -2276,8 +2280,9 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
// The high part is obtained by SRA'ing all but one of the bits of low part.
unsigned LoSize = NVT.getSizeInBits();
- Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
- DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy()));
+ Hi = DAG.getNode(
+ ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout())));
} else {
// For example, extension of an i48 to an i64. The operand type necessarily
// promotes to the result type, so will end up being expanded too.
@@ -2312,7 +2317,7 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
// things like sextinreg V:i64 from i8.
Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl,
- TLI.getPointerTy()));
+ TLI.getPointerTy(DAG.getDataLayout())));
} else {
// For example, extension of an i48 to an i64. Leave the low part alone,
// sext_inreg the high part.
@@ -2355,10 +2360,10 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
- Hi = DAG.getNode(ISD::SRL, dl,
- N->getOperand(0).getValueType(), N->getOperand(0),
+ Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0),
DAG.getConstant(NVT.getSizeInBits(), dl,
- TLI.getPointerTy()));
+ TLI.getPointerTy(DAG.getDataLayout())));
Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
}
@@ -2414,7 +2419,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
}
Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
- EVT PtrVT = TLI.getPointerTy();
+ EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
// Replace this with a libcall that will check overflow.
@@ -2845,7 +2850,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
Alignment, AAInfo);
}
- if (TLI.isLittleEndian()) {
+ if (DAG.getDataLayout().isLittleEndian()) {
// Little-endian - low bits are at low addresses.
GetExpandedInteger(N->getValue(), Lo, Hi);
@@ -2882,11 +2887,12 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Transfer high bits from the top of Lo to the bottom of Hi.
Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
- TLI.getPointerTy()));
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SRL, dl, NVT, Lo,
- DAG.getConstant(ExcessBits, dl,
- TLI.getPointerTy())));
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(
+ ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout()))));
}
// Store both the high bits and maybe some of the low bits.
@@ -2956,14 +2962,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
ISD::SETLT);
// Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
- SDValue FudgePtr = DAG.getConstantPool(
- ConstantInt::get(*DAG.getContext(), FF.zext(64)),
- TLI.getPointerTy());
+ SDValue FudgePtr =
+ DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ TLI.getPointerTy(DAG.getDataLayout()));
// Get a pointer to FF if the sign bit was set, or to 0 otherwise.
SDValue Zero = DAG.getIntPtrConstant(0, dl);
SDValue Four = DAG.getIntPtrConstant(4, dl);
- if (TLI.isBigEndian()) std::swap(Zero, Four);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Zero, Four);
SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
Zero, Four);
unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
@@ -3113,9 +3120,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
for (unsigned i = 0; i < NumOperands; ++i) {
SDValue Op = N->getOperand(i);
for (unsigned j = 0; j < NumElem; ++j) {
- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- InElemTy, Op, DAG.getConstant(j, dl,
- TLI.getVectorIdxTy()));
+ SDValue Ext = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op,
+ DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
}
}
@@ -3142,7 +3149,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
- SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, TLI.getVectorIdxTy());
+ SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
V0->getValueType(0).getScalarType(), V0, V1);
@@ -3179,8 +3187,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
for (unsigned i=0; i<NumElem; ++i) {
// Extract element from incoming vector
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
- Incoming, DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
+ SDValue Ex = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
NewOps.push_back(Tr);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 9c297698c1db..a7392fabf1e7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1006,7 +1006,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
SDValue Index) {
SDLoc dl(Index);
// Make sure the index type is big enough to compute in.
- Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy());
+ Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout()));
// Calculate the element offset and add it to the pointer.
unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
@@ -1030,7 +1030,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
DAG.getConstant(LVT.getSizeInBits(), dlHi,
- TLI.getPointerTy()));
+ TLI.getPointerTy(DAG.getDataLayout())));
return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
}
@@ -1079,7 +1079,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy());
+ TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
@@ -1117,7 +1117,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
DAG.getConstant(LoVT.getSizeInBits(), dl,
- TLI.getPointerTy()));
+ TLI.getPointerTy(DAG.getDataLayout())));
Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 2f2778982611..d1131a74cf17 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -73,7 +73,7 @@ private:
}
EVT getSetCCResultType(EVT VT) const {
- return TLI.getSetCCResultType(*DAG.getContext(), VT);
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
/// IgnoreNodeResults - Pretend all of this node's results are legal.
@@ -167,7 +167,7 @@ private:
SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
SDValue JoinIntegers(SDValue Lo, SDValue Hi);
SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
-
+
std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node, bool isSigned);
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
@@ -347,7 +347,7 @@ private:
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi);
bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 330c31ce0eec..14d8f7762086 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -60,18 +60,20 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case TargetLowering::TypeExpandInteger:
- case TargetLowering::TypeExpandFloat:
+ case TargetLowering::TypeExpandFloat: {
+ auto &DL = DAG.getDataLayout();
// Convert the expanded pieces of the input.
GetExpandedOp(InOp, Lo, Hi);
- if (TLI.hasBigEndianPartOrdering(InVT) !=
- TLI.hasBigEndianPartOrdering(OutVT))
+ if (TLI.hasBigEndianPartOrdering(InVT, DL) !=
+ TLI.hasBigEndianPartOrdering(OutVT, DL))
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
+ }
case TargetLowering::TypeSplitVector:
GetSplitVector(InOp, Lo, Hi);
- if (TLI.hasBigEndianPartOrdering(OutVT))
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
@@ -88,7 +90,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
- if (TLI.hasBigEndianPartOrdering(OutVT))
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
@@ -119,9 +121,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
SmallVector<SDValue, 8> Vals;
for (unsigned i = 0; i < NumElems; ++i)
- Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
- CastInOp, DAG.getConstant(i, dl,
- TLI.getVectorIdxTy())));
+ Vals.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
// Build Lo, Hi pair by pairing extracted elements if needed.
unsigned Slot = 0;
@@ -131,7 +133,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LHS = Vals[Slot];
SDValue RHS = Vals[Slot + 1];
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(LHS, RHS);
Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
@@ -143,7 +145,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = Vals[Slot++];
Hi = Vals[Slot++];
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
return;
@@ -155,9 +157,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Create the stack frame object. Make sure it is aligned for both
// the source and expanded destination types.
- unsigned Alignment =
- TLI.getDataLayout()->getPrefTypeAlignment(NOutVT.
- getTypeForEVT(*DAG.getContext()));
+ unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(
+ NOutVT.getTypeForEVT(*DAG.getContext()));
SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
@@ -182,7 +183,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
false, false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
- if (TLI.hasBigEndianPartOrdering(OutVT))
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
}
@@ -241,7 +242,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
DAG.getConstant(1, dl, Idx.getValueType()));
Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
}
@@ -282,7 +283,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
Hi.getValue(1));
// Handle endianness of the load.
- if (TLI.hasBigEndianPartOrdering(ValueVT))
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
// Modified the chain - switch anything that used the old chain to use
@@ -302,7 +303,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
// Handle endianness of the load.
- if (TLI.hasBigEndianPartOrdering(OVT))
+ if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
// Modified the chain - switch anything that used the old chain to use
@@ -325,7 +326,7 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
if (NumElements > 1) {
NumElements >>= 1;
SplitInteger(Op, Parts[0], Parts[1]);
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Parts[0], Parts[1]);
IntegerToVector(Parts[0], NumElements, Ops, EltVT);
IntegerToVector(Parts[1], NumElements, Ops, EltVT);
@@ -389,7 +390,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Lo, Hi;
GetExpandedOp(N->getOperand(i), Lo, Hi);
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
NewElts.push_back(Lo);
NewElts.push_back(Hi);
@@ -431,7 +432,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
SDValue Lo, Hi;
GetExpandedOp(Val, Lo, Hi);
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
SDValue Idx = N->getOperand(2);
@@ -481,7 +482,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
SDValue Lo, Hi;
GetExpandedOp(St->getValue(), Lo, Hi);
- if (TLI.hasBigEndianPartOrdering(ValueVT))
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ee844a8a4c58..83d4ad5ea1f4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -503,7 +503,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
// Instead, we load all significant words, mask bits off, and concatenate
// them to form each element. Finally, they are extended to destination
// scalar type to build the destination vector.
- EVT WideVT = TLI.getPointerTy();
+ EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
assert(WideVT.isRound() &&
"Could not handle the sophisticated case when the widest integer is"
@@ -563,7 +563,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
SDValue Lo, Hi, ShAmt;
if (BitOffset < WideBits) {
- ShAmt = DAG.getConstant(BitOffset, dl, TLI.getShiftAmountTy(WideVT));
+ ShAmt = DAG.getConstant(
+ BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
}
@@ -573,8 +574,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
WideIdx++;
BitOffset -= WideBits;
if (BitOffset > 0) {
- ShAmt = DAG.getConstant(SrcEltBits - BitOffset, dl,
- TLI.getShiftAmountTy(WideVT));
+ ShAmt = DAG.getConstant(
+ SrcEltBits - BitOffset, dl,
+ TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
}
@@ -592,8 +594,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
break;
case ISD::SEXTLOAD:
- ShAmt = DAG.getConstant(WideBits - SrcEltBits, dl,
- TLI.getShiftAmountTy(WideVT));
+ ShAmt =
+ DAG.getConstant(WideBits - SrcEltBits, dl,
+ TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
@@ -663,8 +666,9 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
// and save them into memory individually.
SmallVector<SDValue, 8> Stores;
for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- RegSclVT, Value, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
+ SDValue Ex = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
@@ -803,7 +807,7 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
// Place the extended lanes into the correct locations.
int ExtLaneScale = NumSrcElements / NumElements;
- int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+ int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
for (int i = 0; i < NumElements; ++i)
ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
@@ -858,7 +862,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
ShuffleMask.push_back(i);
int ExtLaneScale = NumSrcElements / NumElements;
- int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+ int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
for (int i = 0; i < NumElements; ++i)
ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
@@ -995,12 +999,15 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
SDLoc dl(Op);
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < NumElems; ++i) {
- SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
- SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
+ SDValue LHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue RHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
Ops[i] = DAG.getNode(ISD::SETCC, dl,
- TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT),
+ TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TmpEltVT),
LHSElem, RHSElem, CC);
Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
DAG.getConstant(APInt::getAllOnesValue
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 905492c202ca..4348ab79f7d1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -251,8 +251,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
Op = GetScalarizedVector(Op);
} else {
EVT VT = OpVT.getVectorElementType();
- Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+ Op = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
}
@@ -384,10 +385,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
RHS = GetScalarizedVector(RHS);
} else {
EVT VT = OpVT.getVectorElementType();
- LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
- RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+ LHS = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ RHS = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
// Turn it into a scalar SETCC.
@@ -742,7 +745,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
// expanded pieces.
if (LoVT == HiVT) {
GetExpandedOp(InOp, Lo, Hi);
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
@@ -761,12 +764,12 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
// In the general case, convert the input to an integer and split it by hand.
EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(LoIntVT, HiIntVT);
SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
@@ -819,7 +822,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl,
- TLI.getVectorIdxTy()));
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
}
void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
@@ -840,7 +843,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
// Store the new subvector into the specified index.
SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
- unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+ unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(),
false, false, 0);
@@ -898,9 +901,10 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
Lo.getValueType(), Lo, Elt, Idx);
else
- Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
- DAG.getConstant(IdxVal - LoNumElts, dl,
- TLI.getVectorIdxTy()));
+ Hi =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getConstant(IdxVal - LoNumElts, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
return;
}
@@ -919,8 +923,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// so use a truncating store.
SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
- unsigned Alignment =
- TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+ unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
false, false, 0);
@@ -1292,10 +1295,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
Idx -= Input * NewElts;
// Extract the vector element by hand.
- SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Inputs[Input],
- DAG.getConstant(Idx, dl,
- TLI.getVectorIdxTy())));
+ SVOps.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input],
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
}
// Construct the Lo/Hi output using a BUILD_VECTOR.
@@ -1472,7 +1474,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
Lo = BitConvertToInteger(Lo);
Hi = BitConvertToInteger(Hi);
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
@@ -1763,9 +1765,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
for (const SDValue &Op : N->op_values()) {
for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
i != e; ++i) {
- Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
- Op, DAG.getConstant(i, DL, TLI.getVectorIdxTy())));
-
+ Elts.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
}
}
@@ -1829,10 +1831,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
// type. This should normally be something that ends up being legal directly,
// but in theory if a target has very wide vectors and an annoyingly
// restricted set of legal types, this split can chain to build things up.
- return IsFloat ?
- DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
- DAG.getTargetConstant(0, DL, TLI.getPointerTy())) :
- DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+ return IsFloat
+ ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
+ DAG.getTargetConstant(
+ 0, DL, TLI.getPointerTy(DAG.getDataLayout())))
+ : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
}
SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
@@ -2062,12 +2065,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
// }
while (CurNumElts != 0) {
while (CurNumElts >= NumElts) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
- DAG.getConstant(Idx, dl,
- TLI.getVectorIdxTy()));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
- DAG.getConstant(Idx, dl,
- TLI.getVectorIdxTy()));
+ SDValue EOp1 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp2 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
Idx += NumElts;
CurNumElts -= NumElts;
@@ -2079,14 +2082,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
if (NumElts == 1) {
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
- InOp1,
- DAG.getConstant(Idx, dl,
- TLI.getVectorIdxTy()));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
- InOp2,
- DAG.getConstant(Idx, dl,
- TLI.getVectorIdxTy()));
+ SDValue EOp1 = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp2 = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
EOp1, EOp2);
}
@@ -2123,9 +2124,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
SDValue VecOp = DAG.getUNDEF(NextVT);
unsigned NumToInsert = ConcatEnd - Idx - 1;
for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
- ConcatOps[OpIdx],
- DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
+ VecOp = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
ConcatOps[Idx+1] = VecOp;
ConcatEnd = Idx + 2;
@@ -2211,8 +2212,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}
if (InVTNumElts % WidenNumElts == 0) {
- SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+ SDValue InVal = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
// Extract the input and convert the shorten input vector.
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVal);
@@ -2226,8 +2228,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
unsigned i;
for (i=0; i < MinElts; ++i) {
- SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy()));
+ SDValue Val = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
if (N->getNumOperands() == 1)
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
else
@@ -2453,8 +2456,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (InputWidened)
InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(j, dl, TLI.getVectorIdxTy()));
+ Ops[Idx++] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
@@ -2511,8 +2515,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
if (InVTNumElts % WidenNumElts == 0) {
// Extract the input and convert the shorten input vector.
- InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+ InOp = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
SatOp, CvtCode);
}
@@ -2527,8 +2532,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
unsigned i;
for (i=0; i < MinElts; ++i) {
- SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
+ SDValue ExtVal = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
SatOp, CvtCode);
}
@@ -2570,8 +2576,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
for (i=0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(IdxVal + i, dl, TLI.getVectorIdxTy()));
+ Ops[i] =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(IdxVal + i, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
@@ -2872,12 +2880,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
"We can't have the same type as we started with!");
if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
- InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
- DAG.getUNDEF(FixedVT), InOp,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+ InOp = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
else
- InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+ InOp = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
break;
}
}
@@ -2920,10 +2929,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
unsigned Opcode = N->getOpcode();
SmallVector<SDValue, 16> Ops(NumElts);
for (unsigned i=0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(Opcode, dl, EltVT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, dl,
- TLI.getVectorIdxTy())));
+ Ops[i] = DAG.getNode(
+ Opcode, dl, EltVT,
+ DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
@@ -2943,8 +2953,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+ return DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
}
@@ -2971,8 +2982,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(j, dl, TLI.getVectorIdxTy()));
+ Ops[Idx++] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
@@ -3053,7 +3065,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
// Get a new SETCC node to compare the newly widened operands.
// Only some of the compared elements are legal.
- EVT SVT = TLI.getSetCCResultType(*DAG.getContext(), InOp0.getValueType());
+ EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ InOp0.getValueType());
SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
SVT, InOp0, InOp1, N->getOperand(2));
@@ -3061,9 +3074,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
SVT.getVectorElementType(),
N->getValueType(0).getVectorNumElements());
- SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
- ResVT, WideSETCC,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+ SDValue CC = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
return PromoteTargetBoolean(CC, N->getValueType(0));
}
@@ -3159,8 +3172,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
LdTy = NewLdTy;
}
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
- DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy()));
+ VecOp = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
@@ -3407,9 +3421,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
if (NewVT.isVector()) {
unsigned NumVTElts = NewVT.getVectorNumElements();
do {
- SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
- DAG.getConstant(Idx, dl,
- TLI.getVectorIdxTy()));
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo().getWithOffset(Offset),
isVolatile, isNonTemporal,
@@ -3429,8 +3443,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
// Readjust index position based on new vector type
Idx = Idx * ValEltWidth / NewVTWidth;
do {
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
- DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy()));
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getConstant(Idx++, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo().getWithOffset(Offset),
isVolatile, isNonTemporal,
@@ -3476,8 +3492,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
EVT ValEltVT = ValVT.getVectorElementType();
unsigned Increment = ValEltVT.getSizeInBits() / 8;
unsigned NumElts = StVT.getVectorNumElements();
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo(), StEltVT,
isVolatile, isNonTemporal, Align,
@@ -3488,8 +3505,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
BasePtr,
DAG.getConstant(Offset, dl,
BasePtr.getValueType()));
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
ST->getPointerInfo().getWithOffset(Offset),
StEltVT, isVolatile, isNonTemporal,
@@ -3525,8 +3543,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
}
if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
// Fall back to extract and build.
SmallVector<SDValue, 16> Ops(WidenNumElts);
@@ -3534,8 +3553,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
unsigned Idx;
for (Idx = 0; Idx < MinNumElts; ++Idx)
- Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
+ Ops[Idx] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
SDValue UndefVal = DAG.getUNDEF(EltVT);
for ( ; Idx < WidenNumElts; ++Idx)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 00cbae3986cd..34e1a7001082 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -725,9 +725,8 @@ void ScheduleDAGLinearize::Schedule() {
SmallVector<SDNode*, 8> Glues;
unsigned DAGSize = 0;
- for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(),
- E = DAG->allnodes_end(); I != E; ++I) {
- SDNode *N = I;
+ for (SDNode &Node : DAG->allnodes()) {
+ SDNode *N = &Node;
// Use node id to record degree.
unsigned Degree = N->use_size();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b22d6edd85af..2a6c853a1d11 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -289,9 +289,8 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
/// ClusterNodes - Cluster certain nodes which should be scheduled together.
///
void ScheduleDAGSDNodes::ClusterNodes() {
- for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
- E = DAG->allnodes_end(); NI != E; ++NI) {
- SDNode *Node = &*NI;
+ for (SDNode &NI : DAG->allnodes()) {
+ SDNode *Node = &NI;
if (!Node || !Node->isMachineOpcode())
continue;
@@ -308,9 +307,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// to their associated SUnits by holding SUnits table indices. A value
// of -1 means the SDNode does not yet have an associated SUnit.
unsigned NumNodes = 0;
- for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
- E = DAG->allnodes_end(); NI != E; ++NI) {
- NI->setNodeId(-1);
+ for (SDNode &NI : DAG->allnodes()) {
+ NI.setNodeId(-1);
++NumNodes;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index be5478275f99..14f44ccc60ce 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -151,8 +151,8 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
bool IsAllUndef = true;
- for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF)
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.getOpcode() == ISD::UNDEF)
continue;
IsAllUndef = false;
// Do not accept build_vectors that aren't all constants or which have non-0
@@ -163,12 +163,11 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// We only want to check enough bits to cover the vector elements, because
// we care if the resultant vector is all zeros, not whether the individual
// constants are.
- SDValue Zero = N->getOperand(i);
unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
if (CN->getAPIntValue().countTrailingZeros() < EltSize)
return false;
- } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) {
if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
return false;
} else
@@ -921,7 +920,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
PointerType::get(Type::getInt8Ty(*getContext()), 0) :
VT.getTypeForEVT(*getContext());
- return TLI->getDataLayout()->getABITypeAlignment(Ty);
+ return getDataLayout().getABITypeAlignment(Ty);
}
// EntryNode could meaningfully have debug info if we can find it...
@@ -1184,7 +1183,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
// EltParts is currently in little endian order. If we actually want
// big-endian order then reverse it now.
- if (TLI->isBigEndian())
+ if (getDataLayout().isBigEndian())
std::reverse(EltParts.begin(), EltParts.end());
// The elements must be reversed when the element order is different
@@ -1234,7 +1233,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
}
SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) {
- return getConstant(Val, DL, TLI->getPointerTy(), isTarget);
+ return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
}
SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT,
@@ -1303,7 +1302,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
"Cannot set target flags on target-independent globals");
// Truncate (with sign-extension) the offset value to the pointer size.
- unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType());
+ unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
if (BitWidth < 64)
Offset = SignExtend64(Offset, BitWidth);
@@ -1373,7 +1372,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType());
+ Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1400,7 +1399,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType());
+ Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1850,7 +1849,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
/// the target's desired shift amount type.
SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT OpTy = Op.getValueType();
- EVT ShTy = TLI->getShiftAmountTy(LHSTy);
+ EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
if (OpTy == ShTy || OpTy.isVector()) return Op;
ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -1864,10 +1863,10 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
unsigned ByteSize = VT.getStoreSize();
Type *Ty = VT.getTypeForEVT(*getContext());
unsigned StackAlign =
- std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
+ std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
- return getFrameIndex(FrameIdx, TLI->getPointerTy());
+ return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
}
/// CreateStackTemporary - Create a stack temporary suitable for holding
@@ -1877,13 +1876,13 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
VT2.getStoreSizeInBits())/8;
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
- const DataLayout *TD = TLI->getDataLayout();
- unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
- TD->getPrefTypeAlignment(Ty2));
+ const DataLayout &DL = getDataLayout();
+ unsigned Align =
+ std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));
MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
- return getFrameIndex(FrameIdx, TLI->getPointerTy());
+ return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
}
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
@@ -1916,9 +1915,9 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
break;
}
- if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {
const APInt &C2 = N2C->getAPIntValue();
- if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
const APInt &C1 = N1C->getAPIntValue();
switch (Cond) {
@@ -1936,8 +1935,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
}
}
}
- if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
- if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+ if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) {
+ if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) {
APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
switch (Cond) {
default: break;
@@ -2356,15 +2355,24 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
+ // Output known-0 bits are also known if the top bits of each input are
+ // known to be clear. For example, if one input has the top 10 bits clear
+ // and the other has the top 8 bits clear, we know the top 7 bits of the
+ // output must be clear.
computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
- unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+ unsigned KnownZeroHigh = KnownZero2.countLeadingOnes();
+ unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
- KnownZeroOut = std::min(KnownZeroOut,
+ KnownZeroHigh = std::min(KnownZeroHigh,
+ KnownZero2.countLeadingOnes());
+ KnownZeroLow = std::min(KnownZeroLow,
KnownZero2.countTrailingOnes());
if (Op.getOpcode() == ISD::ADD) {
- KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow);
+ if (KnownZeroHigh > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1);
break;
}
@@ -2372,8 +2380,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// information if we know (at least) that the low two bits are clear. We
// then return to the caller that the low bit is unknown but that other bits
// are known zero.
- if (KnownZeroOut >= 2) // ADDE
- KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
+ if (KnownZeroLow >= 2) // ADDE
+ KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow);
break;
}
case ISD::SREM:
@@ -2814,7 +2822,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
// doesn't create new constants with different values. Nevertheless, the
// opaque flag is preserved during folding to prevent future folding with
// other constants.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) {
const APInt &Val = C->getAPIntValue();
switch (Opcode) {
default: break;
@@ -2861,7 +2869,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
}
// Constant fold unary operations with a floating point constant operand.
- if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) {
APFloat V = C->getValueAPF(); // make copy
switch (Opcode) {
case ISD::FNEG:
@@ -2922,7 +2930,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
}
// Constant fold unary operations with a vector integer or float operand.
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) {
if (BV->isConstant()) {
switch (Opcode) {
default:
@@ -3278,8 +3286,8 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SDValue N2, const SDNodeFlags *Flags) {
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
switch (Opcode) {
default: break;
case ISD::TokenFactor:
@@ -3499,7 +3507,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
Ops.push_back(Op);
continue;
}
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode())) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = C->getAPIntValue();
Ops.push_back(SignExtendInReg(Val));
continue;
@@ -3554,7 +3562,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
// if the indices are known different, extract the element from
// the original vector.
SDValue N1Op2 = N1.getOperand(2);
- ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode());
+ ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2);
if (N1Op2C && N2C) {
if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
@@ -3600,9 +3608,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
"Extract subvector must be from larger vector to smaller vector!");
- if (isa<ConstantSDNode>(Index.getNode())) {
+ if (isa<ConstantSDNode>(Index)) {
assert((VT.getVectorNumElements() +
- cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ cast<ConstantSDNode>(Index)->getZExtValue()
<= N1.getValueType().getVectorNumElements())
&& "Extract subvector overflow!");
}
@@ -3628,8 +3636,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
// Constant fold FP operations.
bool HasFPExceptions = TLI->hasFloatingPointExceptions();
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
- ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
if (N1CFP) {
if (!N2CFP && isCommutativeBinOp(Opcode)) {
// Canonicalize constant to RHS if commutative.
@@ -3787,7 +3795,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3) {
// Perform various simplifications.
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
switch (Opcode) {
case ISD::FMA: {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
@@ -3845,9 +3853,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
"Dest and insert subvector source types must match!");
assert(N2.getSimpleValueType() <= N1.getSimpleValueType() &&
"Insert subvector must be from smaller vector to larger vector!");
- if (isa<ConstantSDNode>(Index.getNode())) {
+ if (isa<ConstantSDNode>(Index)) {
assert((N2.getValueType().getVectorNumElements() +
- cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ cast<ConstantSDNode>(Index)->getZExtValue()
<= VT.getVectorNumElements())
&& "Insert subvector overflow!");
}
@@ -3994,7 +4002,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
APInt Val(NumVTBits, 0);
- if (TLI.isLittleEndian()) {
+ if (DAG.getDataLayout().isLittleEndian()) {
for (unsigned i = 0; i != NumBytes; ++i)
Val |= (uint64_t)(unsigned char)Str[i] << i*8;
} else {
@@ -4066,9 +4074,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
if (VT == MVT::Other) {
unsigned AS = 0;
- if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) ||
+ if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(AS) ||
TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) {
- VT = TLI.getPointerTy();
+ VT = TLI.getPointerTy(DAG.getDataLayout());
} else {
switch (DstAlign & 7) {
case 0: VT = MVT::i64; break;
@@ -4185,14 +4193,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+ unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
- while (NewAlign > Align &&
- TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))
+ while (NewAlign > Align &&
+ DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign))
NewAlign /= 2;
if (NewAlign > Align) {
@@ -4294,7 +4302,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+ unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -4385,7 +4393,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+ unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -4488,19 +4496,21 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext());
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
- TLI->getPointerTy()), std::move(Args), 0)
- .setDiscardResult()
- .setTailCall(isTailCall);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args), 0)
+ .setDiscardResult()
+ .setTailCall(isTailCall);
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
@@ -4544,19 +4554,21 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext());
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
- Type::getVoidTy(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
- TLI->getPointerTy()), std::move(Args), 0)
- .setDiscardResult()
- .setTailCall(isTailCall);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args), 0)
+ .setDiscardResult()
+ .setTailCall(isTailCall);
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
@@ -4594,7 +4606,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
}
// Emit a library call.
- Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
+ Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst; Entry.Ty = IntPtrTy;
@@ -4608,13 +4620,15 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
- Type::getVoidTy(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
- TLI->getPointerTy()), std::move(Args), 0)
- .setDiscardResult()
- .setTailCall(isTailCall);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args), 0)
+ .setDiscardResult()
+ .setTailCall(isTailCall);
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
@@ -6656,7 +6670,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {
/// isOnlyUserOf - Return true if this node is the only use of N.
///
-bool SDNode::isOnlyUserOf(SDNode *N) const {
+bool SDNode::isOnlyUserOf(const SDNode *N) const {
bool Seen = false;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDNode *User = *I;
@@ -6671,16 +6685,16 @@ bool SDNode::isOnlyUserOf(SDNode *N) const {
/// isOperand - Return true if this node is an operand of N.
///
-bool SDValue::isOperandOf(SDNode *N) const {
+bool SDValue::isOperandOf(const SDNode *N) const {
for (const SDValue &Op : N->op_values())
if (*this == Op)
return true;
return false;
}
-bool SDNode::isOperandOf(SDNode *N) const {
- for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
- if (this == N->OperandList[i].getNode())
+bool SDNode::isOperandOf(const SDNode *N) const {
+ for (const SDValue &Op : N->op_values())
+ if (this == Op.getNode())
return true;
return false;
}
@@ -6784,10 +6798,9 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
if (OperandVT.isVector()) {
// A vector operand; extract a single element.
EVT OperandEltVT = OperandVT.getVectorElementType();
- Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- OperandEltVT,
- Operand,
- getConstant(i, dl, TLI->getVectorIdxTy()));
+ Operands[j] =
+ getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+ getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout())));
} else {
// A scalar operand; just use it as is.
Operands[j] = Operand;
@@ -6891,10 +6904,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
const GlobalValue *GV;
int64_t GVOffset = 0;
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
+ unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne,
- *TLI->getDataLayout());
+ getDataLayout());
unsigned AlignBits = KnownZero.countTrailingOnes();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
if (Align)
@@ -6950,10 +6963,10 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
"More vector elements requested than available!");
SDValue Lo, Hi;
Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
- getConstant(0, DL, TLI->getVectorIdxTy()));
+ getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
getConstant(LoVT.getVectorNumElements(), DL,
- TLI->getVectorIdxTy()));
+ TLI->getVectorIdxTy(getDataLayout())));
return std::make_pair(Lo, Hi);
}
@@ -6965,7 +6978,7 @@ void SelectionDAG::ExtractVectorElements(SDValue Op,
Count = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
- EVT IdxTy = TLI->getVectorIdxTy();
+ EVT IdxTy = TLI->getVectorIdxTy(getDataLayout());
SDLoc SL(Op);
for (unsigned i = Start, e = Start + Count; i != e; ++i) {
Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
@@ -7080,14 +7093,12 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
ConstantSDNode *
BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
- return dyn_cast_or_null<ConstantSDNode>(
- getSplatValue(UndefElements).getNode());
+ return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));
}
ConstantFPSDNode *
BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
- return dyn_cast_or_null<ConstantFPSDNode>(
- getSplatValue(UndefElements).getNode());
+ return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
}
bool BuildVectorSDNode::isConstant() const {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4897082f89aa..2c3c0eb101a0 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -146,7 +146,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
}
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
@@ -160,13 +160,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
// Combine the round and odd parts.
Lo = Val;
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
- Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
- DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
- TLI.getPointerTy()));
+ Hi =
+ DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
+ TLI.getPointerTy(DAG.getDataLayout())));
Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
}
@@ -177,7 +178,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
- if (TLI.hasBigEndianPartOrdering(ValueVT))
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
} else {
@@ -211,8 +212,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
// FP_ROUND's are always exact here.
if (ValueVT.bitsLT(Val.getValueType()))
- return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
- DAG.getTargetConstant(1, DL, TLI.getPointerTy()));
+ return DAG.getNode(
+ ISD::FP_ROUND, DL, ValueVT, Val,
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
@@ -305,8 +307,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
"Cannot narrow, it would be a lossy transformation");
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
// Vector/Vector bitcast.
@@ -362,10 +365,10 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
if (ValueVT.isVector())
return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
- assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
+ "Copying to an illegal type!");
if (NumParts == 0)
return;
@@ -433,7 +436,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
DAG.getIntPtrConstant(RoundBits, DL));
getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
// The odd parts were reversed by getCopyToParts - unreverse them.
std::reverse(Parts + RoundParts, Parts + NumParts);
@@ -468,7 +471,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
}
}
- if (TLI.isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
std::reverse(Parts, Parts + OrigNumParts);
}
@@ -497,9 +500,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
// undef elements.
SmallVector<SDValue, 16> Ops;
for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
- Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- ElementVT, Val, DAG.getConstant(i, DL,
- TLI.getVectorIdxTy())));
+ Ops.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
for (unsigned i = ValueVT.getVectorNumElements(),
e = PartVT.getVectorNumElements(); i != e; ++i)
@@ -524,9 +527,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
// Vector -> scalar conversion.
assert(ValueVT.getVectorNumElements() == 1 &&
"Only trivial vector-to-scalar conversions should get here!");
- Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- PartVT, Val,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+ Val = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
bool Smaller = ValueVT.bitsLE(PartVT);
Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
@@ -554,14 +557,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
if (IntermediateVT.isVector())
- Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
- IntermediateVT, Val,
- DAG.getConstant(i * (NumElements / NumIntermediates), DL,
- TLI.getVectorIdxTy()));
+ Ops[i] =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
+ DAG.getConstant(i * (NumElements / NumIntermediates), DL,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
else
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- IntermediateVT, Val,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy()));
+ Ops[i] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
// Split the intermediate operands into legal parts.
@@ -588,14 +591,14 @@ RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
EVT valuevt)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
-RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &tli,
- unsigned Reg, Type *Ty) {
- ComputeValueVTs(tli, Ty, ValueVTs);
+RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL, unsigned Reg, Type *Ty) {
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs);
for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
EVT ValueVT = ValueVTs[Value];
- unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
- MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+ unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
RegVTs.push_back(RegisterVT);
@@ -796,7 +799,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
// If we clobbered the stack pointer, MFI should know about it.
assert(DAG.getMachineFunction().getFrameInfo()->
- hasInlineAsmWithSPAdjust());
+ hasOpaqueSPAdjustment());
}
}
}
@@ -807,7 +810,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
AA = &aa;
GFI = gfi;
LibInfo = li;
- DL = DAG.getTarget().getDataLayout();
+ DL = &DAG.getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
}
@@ -964,8 +967,8 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
if (It != FuncInfo.ValueMap.end()) {
unsigned InReg = It->second;
- RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
- Ty);
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), InReg, Ty);
SDValue Chain = DAG.getEntryNode();
Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
resolveDanglingDebugInfo(V, Result);
@@ -1031,7 +1034,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (const Constant *C = dyn_cast<Constant>(V)) {
- EVT VT = TLI.getValueType(V->getType(), true);
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
return DAG.getConstant(*CI, getCurSDLoc(), VT);
@@ -1041,7 +1044,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (isa<ConstantPointerNull>(C)) {
unsigned AS = V->getType()->getPointerAddressSpace();
- return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(AS));
+ return DAG.getConstant(0, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout(), AS));
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
@@ -1095,7 +1099,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
"Unknown struct or array constant!");
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, C->getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
unsigned NumElts = ValueVTs.size();
if (NumElts == 0)
return SDValue(); // empty struct
@@ -1127,7 +1131,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
Ops.push_back(getValue(CV->getOperand(i)));
} else {
assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
- EVT EltVT = TLI.getValueType(VecTy->getElementType());
+ EVT EltVT =
+ TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
SDValue Op;
if (EltVT.isFloatingPoint())
@@ -1147,13 +1152,15 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end())
- return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+ return DAG.getFrameIndex(SI->second,
+ TLI.getPointerTy(DAG.getDataLayout()));
}
// If this is an instruction which fast-isel has deferred, select it now.
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
- RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+ RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
+ Inst->getType());
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
@@ -1163,6 +1170,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto &DL = DAG.getDataLayout();
SDValue Chain = getControlRoot();
SmallVector<ISD::OutputArg, 8> Outs;
SmallVector<SDValue, 8> OutVals;
@@ -1175,7 +1183,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// Leave Outs empty so that LowerReturn won't try to load return
// registers the usual way.
SmallVector<EVT, 1> PtrValueVTs;
- ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+ ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
PtrValueVTs);
SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
@@ -1183,7 +1191,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
@@ -1203,7 +1211,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
MVT::Other, Chains);
} else if (I.getNumOperands() != 0) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues) {
SDValue RetOp = getValue(I.getOperand(0));
@@ -1692,7 +1700,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
// Emit the code for the jump table
assert(JT.Reg != -1U && "Should lower JT Header first!");
- EVT PTy = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
JT.Reg, PTy);
SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
@@ -1723,9 +1731,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
// This value may be smaller or larger than the target's pointer type, and
// therefore require extension or truncating.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy());
+ SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
- unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
+ unsigned JumpTableReg =
+ FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
@@ -1733,11 +1742,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
// Emit the range check for the jump table, and branch to the default block
// for the switch statement if the value being switched on exceeds the largest
// case in the switch.
- SDValue CMP =
- DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT),
- ISD::SETUGT);
+ SDValue CMP = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, CopyTo, CMP,
@@ -1762,7 +1770,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
// First create the loads to the guard/stack slot for the comparison.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT PtrTy = TLI.getPointerTy();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
int FI = MFI->getStackProtectorIndex();
@@ -1771,8 +1779,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDValue GuardPtr = getValue(IRGuard);
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
- unsigned Align =
- TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
+ unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType());
SDValue Guard;
SDLoc dl = getCurSDLoc();
@@ -1799,10 +1806,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
EVT VT = Guard.getValueType();
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
- SDValue Cmp =
- DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
+ SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
// If the sub is not 0, then we know the guard/stackslot do not equal, so
// branch to failure MBB.
@@ -1848,10 +1855,10 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
// Check range
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue RangeCmp =
- DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
+ SDValue RangeCmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
// Determine the type of the test operands.
bool UsePtrType = false;
@@ -1867,7 +1874,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
}
}
if (UsePtrType) {
- VT = TLI.getPointerTy();
+ VT = TLI.getPointerTy(DAG.getDataLayout());
Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
}
@@ -1909,13 +1916,15 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
Cmp = DAG.getSetCC(
- dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
- DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ);
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
+ ISD::SETEQ);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
Cmp = DAG.getSetCC(
- dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
- DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE);
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
+ ISD::SETNE);
} else {
// Make desired shift
SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
@@ -1924,8 +1933,9 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
// Emit bit tests and jumps
SDValue AndOp = DAG.getNode(ISD::AND, dl,
VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
- Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp,
- DAG.getConstant(0, dl, VT), ISD::SETNE);
+ Cmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
}
// The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
@@ -2013,7 +2023,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
SmallVector<EVT, 2> ValueVTs;
SDLoc dl = getCurSDLoc();
- ComputeValueVTs(TLI, LP.getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
// Get the two live-in registers as SDValues. The physregs have already been
@@ -2022,14 +2032,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
if (FuncInfo.ExceptionPointerVirtReg) {
Ops[0] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), dl,
- FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
+ FuncInfo.ExceptionPointerVirtReg,
+ TLI.getPointerTy(DAG.getDataLayout())),
dl, ValueVTs[0]);
} else {
- Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy());
+ Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
}
Ops[1] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), dl,
- FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()),
+ FuncInfo.ExceptionSelectorVirtReg,
+ TLI.getPointerTy(DAG.getDataLayout())),
dl, ValueVTs[1]);
// Merge into one.
@@ -2038,28 +2050,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
setValue(&LP, Res);
}
-unsigned
-SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV,
- MachineBasicBlock *LPadBB) {
- SDValue Chain = getControlRoot();
- SDLoc dl = getCurSDLoc();
-
- // Get the typeid that we will dispatch on later.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy());
- unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
- unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV);
- SDValue Sel = DAG.getConstant(TypeID, dl, TLI.getPointerTy());
- Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel);
-
- // Branch to the main landing pad block.
- MachineBasicBlock *ClauseMBB = FuncInfo.MBB;
- ClauseMBB->addSuccessor(LPadBB);
- DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain,
- DAG.getBasicBlock(LPadBB)));
- return VReg;
-}
-
void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
#ifndef NDEBUG
for (const CaseCluster &CC : Clusters)
@@ -2186,8 +2176,8 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- EVT ShiftTy =
- DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType());
+ EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
+ Op2.getValueType(), DAG.getDataLayout());
// Coerce the shift amount to the right type if we can.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -2256,7 +2246,8 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Opcode = getICmpCondCode(predicate);
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
}
@@ -2271,13 +2262,15 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
ISD::CondCode Condition = getFCmpCondCode(predicate);
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
}
void SelectionDAGBuilder::visitSelect(const User &I) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs);
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
+ ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
@@ -2336,7 +2329,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
void SelectionDAGBuilder::visitTrunc(const User &I) {
// TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
}
@@ -2344,7 +2338,8 @@ void SelectionDAGBuilder::visitZExt(const User &I) {
// ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// ZExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
}
@@ -2352,7 +2347,8 @@ void SelectionDAGBuilder::visitSExt(const User &I) {
// SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// SExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
}
@@ -2361,43 +2357,49 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT DestVT = TLI.getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
- DAG.getTargetConstant(0, dl, TLI.getPointerTy())));
+ DAG.getTargetConstant(
+ 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
}
void SelectionDAGBuilder::visitFPExt(const User &I) {
// FPExt is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPToUI(const User &I) {
// FPToUI is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPToSI(const User &I) {
// FPToSI is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitUIToFP(const User &I) {
// UIToFP is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitSIToFP(const User &I) {
// SIToFP is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
}
@@ -2405,7 +2407,8 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
}
@@ -2413,14 +2416,16 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
}
void SelectionDAGBuilder::visitBitCast(const User &I) {
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
// BitCast assures us that source and destination are the same size so this is
// either a BITCAST or a no-op.
@@ -2442,7 +2447,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Value *SV = I.getOperand(0);
SDValue N = getValue(SV);
- EVT DestVT = TLI.getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
unsigned SrcAS = SV->getType()->getPointerAddressSpace();
unsigned DestAS = I.getType()->getPointerAddressSpace();
@@ -2457,19 +2462,21 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
SDValue InVal = getValue(I.getOperand(1));
- SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)),
- getCurSDLoc(), TLI.getVectorIdxTy());
+ SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
- TLI.getValueType(I.getType()), InVec, InVal, InIdx));
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ InVec, InVal, InIdx));
}
void SelectionDAGBuilder::visitExtractElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
- SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)),
- getCurSDLoc(), TLI.getVectorIdxTy());
+ SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
- TLI.getValueType(I.getType()), InVec, InIdx));
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ InVec, InIdx));
}
// Utility for visitShuffleVector - Return true if every element in Mask,
@@ -2492,7 +2499,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
unsigned MaskNumElts = Mask.size();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = TLI.getValueType(I.getType());
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -2614,7 +2621,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDLoc dl = getCurSDLoc();
Src = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, Src,
- DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy()));
+ DAG.getConstant(StartIdx[Input], dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
}
}
@@ -2641,7 +2649,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// replacing the shuffle with extract and build vector.
// to insert and build vector.
EVT EltVT = VT.getVectorElementType();
- EVT IdxVT = TLI.getVectorIdxTy();
+ EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
SDLoc dl = getCurSDLoc();
SmallVector<SDValue,8> Ops;
for (unsigned i = 0; i != MaskNumElts; ++i) {
@@ -2676,9 +2684,9 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> AggValueVTs;
- ComputeValueVTs(TLI, AggTy, AggValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
SmallVector<EVT, 4> ValValueVTs;
- ComputeValueVTs(TLI, ValTy, ValValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
unsigned NumAggValues = AggValueVTs.size();
unsigned NumValValues = ValValueVTs.size();
@@ -2722,7 +2730,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValValueVTs;
- ComputeValueVTs(TLI, ValTy, ValValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
unsigned NumValValues = ValValueVTs.size();
@@ -2755,6 +2763,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue N = getValue(Op0);
SDLoc dl = getCurSDLoc();
+ // Normalize Vector GEP - all scalar operands should be converted to the
+ // splat vector.
+ unsigned VectorWidth = I.getType()->isVectorTy() ?
+ cast<VectorType>(I.getType())->getVectorNumElements() : 0;
+
+ if (VectorWidth && !N.getValueType().isVector()) {
+ MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth);
+ SmallVector<SDValue, 16> Ops(VectorWidth, N);
+ N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ }
for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
OI != E; ++OI) {
const Value *Idx = *OI;
@@ -2770,16 +2788,25 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Ty = StTy->getElementType(Field);
} else {
Ty = cast<SequentialType>(Ty)->getElementType();
- MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS);
+ MVT PtrTy =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS);
unsigned PtrSize = PtrTy.getSizeInBits();
APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
- // If this is a constant subscript, handle it quickly.
- if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ // If this is a scalar constant or a splat vector of constants,
+ // handle it quickly.
+ const auto *CI = dyn_cast<ConstantInt>(Idx);
+ if (!CI && isa<ConstantDataVector>(Idx) &&
+ cast<ConstantDataVector>(Idx)->getSplatValue())
+ CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
+
+ if (CI) {
if (CI->isZero())
continue;
APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
- SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy);
+ SDValue OffsVal = VectorWidth ?
+ DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
+ DAG.getConstant(Offs, dl, PtrTy);
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
continue;
}
@@ -2787,6 +2814,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// N = N + Idx * ElementSize;
SDValue IdxN = getValue(Idx);
+ if (!IdxN.getValueType().isVector() && VectorWidth) {
+ MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
+ SmallVector<SDValue, 16> Ops(VectorWidth, IdxN);
+ IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ }
// If the index is smaller or larger than intptr_t, truncate or extend
// it.
IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
@@ -2823,14 +2855,14 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
SDLoc dl = getCurSDLoc();
Type *Ty = I.getAllocatedType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+ auto &DL = DAG.getDataLayout();
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
unsigned Align =
- std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
- I.getAlignment());
+ std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
SDValue AllocSize = getValue(I.getArraySize());
- EVT IntPtr = TLI.getPointerTy();
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
@@ -2898,7 +2930,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -2975,8 +3007,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(),
- ValueVTs, &Offsets);
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
+ SrcV->getType(), ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -3077,9 +3109,10 @@ static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
else if (SDB->findValue(ShuffleInst)) {
SDValue ShuffleNode = SDB->getValue(ShuffleInst);
SDLoc sdl = ShuffleNode;
- Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl,
- ShuffleNode.getValueType().getScalarType(), ShuffleNode,
- DAG.getConstant(0, sdl, TLI.getVectorIdxTy()));
+ Base = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, sdl,
+ ShuffleNode.getValueType().getScalarType(), ShuffleNode,
+ DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout())));
SDB->setValue(Ptr, Base);
}
else
@@ -3126,7 +3159,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
MachineMemOperand::MOStore, VT.getStoreSize(),
Alignment, AAInfo);
if (!UniformBase) {
- Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy());
+ Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
}
SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
@@ -3146,7 +3179,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
SDValue Mask = getValue(I.getArgOperand(2));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = TLI.getValueType(I.getType());
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
if (!Alignment)
Alignment = DAG.getEVTAlignment(VT);
@@ -3184,7 +3217,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Mask = getValue(I.getArgOperand(2));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = TLI.getValueType(I.getType());
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
if (!Alignment)
Alignment = DAG.getEVTAlignment(VT);
@@ -3214,7 +3247,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
Alignment, AAInfo, Ranges);
if (!UniformBase) {
- Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy());
+ Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
}
SDValue Ops[] = { Root, Src0, Mask, Base, Index };
@@ -3291,8 +3324,10 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Ops[3];
Ops[0] = getRoot();
- Ops[1] = DAG.getConstant(I.getOrdering(), dl, TLI.getPointerTy());
- Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy());
+ Ops[1] = DAG.getConstant(I.getOrdering(), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
+ Ops[2] = DAG.getConstant(I.getSynchScope(), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
@@ -3304,7 +3339,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDValue InChain = getRoot();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = TLI.getValueType(I.getType());
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
@@ -3339,7 +3374,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDValue InChain = getRoot();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = TLI.getValueType(I.getValueOperand()->getType());
+ EVT VT =
+ TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
@@ -3382,7 +3418,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
Info.opc == ISD::INTRINSIC_W_CHAIN)
Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
- TLI.getPointerTy()));
+ TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
@@ -3391,7 +3427,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, I.getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
if (HasChain)
ValueVTs.push_back(MVT::Other);
@@ -3425,7 +3461,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (!I.getType()->isVoidTy()) {
if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
- EVT VT = TLI.getValueType(PTy);
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
}
@@ -3458,8 +3494,9 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
SDLoc dl) {
SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x7f800000, dl, MVT::i32));
- SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
- DAG.getConstant(23, dl, TLI.getPointerTy()));
+ SDValue t1 = DAG.getNode(
+ ISD::SRL, dl, MVT::i32, t0,
+ DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
DAG.getConstant(127, dl, MVT::i32));
return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
@@ -3484,7 +3521,8 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
// IntegerPartOfX <<= 23;
IntegerPartOfX = DAG.getNode(
ISD::SHL, dl, MVT::i32, IntegerPartOfX,
- DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy()));
+ DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
+ DAG.getDataLayout())));
SDValue TwoToFractionalPartOfX;
if (LimitFloatPrecision <= 6) {
@@ -4071,11 +4109,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::vaend: visitVAEnd(I); return nullptr;
case Intrinsic::vacopy: visitVACopy(I); return nullptr;
case Intrinsic::returnaddress:
- setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(),
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return nullptr;
case Intrinsic::frameaddress:
- setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return nullptr;
case Intrinsic::read_register: {
@@ -4083,7 +4123,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Chain = getRoot();
SDValue RegName =
DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
- EVT VT = TLI.getValueType(I.getType());
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Res = DAG.getNode(ISD::READ_REGISTER, sdl,
DAG.getVTList(VT, MVT::Other), Chain, RegName);
setValue(&I, Res);
@@ -4335,14 +4375,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
case Intrinsic::eh_dwarf_cfa: {
SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
- TLI.getPointerTy());
+ TLI.getPointerTy(DAG.getDataLayout()));
SDValue Offset = DAG.getNode(ISD::ADD, sdl,
CfaArg.getValueType(),
DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
CfaArg.getValueType()),
CfaArg);
- SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
- DAG.getConstant(0, sdl, TLI.getPointerTy()));
+ SDValue FA = DAG.getNode(
+ ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()),
+ DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
FA, Offset));
return nullptr;
@@ -4444,7 +4485,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
- EVT DestVT = TLI.getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
@@ -4474,7 +4515,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::convertus: Code = ISD::CVT_US; break;
case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
}
- EVT DestVT = TLI.getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
const Value *Op1 = I.getArgOperand(0);
Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1),
DAG.getValueType(DestVT),
@@ -4564,7 +4605,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(2))));
return nullptr;
case Intrinsic::fmuladd: {
- EVT VT = TLI.getValueType(I.getType());
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
TLI.isFMAFasterThanFMulAndFAdd(VT)) {
setValue(&I, DAG.getNode(ISD::FMA, sdl,
@@ -4593,10 +4634,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MVT::i32))));
return nullptr;
case Intrinsic::convert_from_fp16:
- setValue(&I,
- DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()),
- DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
- getValue(I.getArgOperand(0)))));
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)))));
return nullptr;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
@@ -4640,8 +4681,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::stacksave: {
SDValue Op = getRoot();
- Res = DAG.getNode(ISD::STACKSAVE, sdl,
- DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op);
+ Res = DAG.getNode(
+ ISD::STACKSAVE, sdl,
+ DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return nullptr;
@@ -4655,7 +4697,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- EVT PtrTy = TLI.getPointerTy();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
SDValue Src, Chain = getRoot();
const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand();
const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
@@ -4753,7 +4795,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::adjust_trampoline: {
setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
- TLI.getPointerTy(),
+ TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return nullptr;
}
@@ -4794,10 +4836,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
TargetLowering::ArgListTy Args;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot())
- .setCallee(CallingConv::C, I.getType(),
- DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
- std::move(Args), 0);
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee(
+ CallingConv::C, I.getType(),
+ DAG.getExternalSymbol(TrapFuncName.data(),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
@@ -4873,7 +4916,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Ops[2];
Ops[0] = getRoot();
- Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
+ Ops[1] =
+ DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true);
unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
@@ -4883,7 +4927,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::invariant_start:
// Discard region information.
- setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
return nullptr;
case Intrinsic::invariant_end:
// Discard region information.
@@ -4903,7 +4947,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::clear_cache:
return TLI.getClearCacheBuiltinName();
case Intrinsic::eh_actions:
- setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
return nullptr;
case Intrinsic::donothing:
// ignore
@@ -4935,11 +4979,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
- case Intrinsic::frameescape: {
+ case Intrinsic::localescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
- // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission
+ // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
// is the same on all targets.
for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
@@ -4953,7 +4997,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
GlobalValue::getRealLinkageName(MF.getName()), Idx);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
- TII->get(TargetOpcode::FRAME_ALLOC))
+ TII->get(TargetOpcode::LOCAL_ESCAPE))
.addSym(FrameAllocSym)
.addFrameIndex(FI);
}
@@ -4961,10 +5005,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
- case Intrinsic::framerecover: {
- // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx)
+ case Intrinsic::localrecover: {
+ // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
MachineFunction &MF = DAG.getMachineFunction();
- MVT PtrVT = TLI.getPointerTy(0);
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0);
// Get the symbol that defines the frame offset.
auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
@@ -4978,7 +5022,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// that would make this PC relative.
SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
SDValue OffsetVal =
- DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym);
+ DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
// Add the offset to the FP.
Value *FP = I.getArgOperand(1);
@@ -4994,7 +5038,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_exceptioncode: {
unsigned Reg = TLI.getExceptionPointerRegister();
assert(Reg && "cannot get exception code on this platform");
- MVT PtrVT = TLI.getPointerTy();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad");
unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
@@ -5178,7 +5222,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
SDValue Value,
bool IsSigned) {
- EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true);
+ EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType(), true);
if (IsSigned)
Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
else
@@ -5203,7 +5248,8 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
if (CSize && CSize->getZExtValue() == 0) {
- EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true);
+ EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType(), true);
setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
return true;
}
@@ -5640,8 +5686,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (!RenameFn)
Callee = getValue(I.getCalledValue());
else
- Callee = DAG.getExternalSymbol(RenameFn,
- DAG.getTargetLoweringInfo().getPointerTy());
+ Callee = DAG.getExternalSymbol(
+ RenameFn,
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
// Check if we can potentially perform a tail call. More detailed checking is
// be done within LowerCallTo, after more information about the call is known.
@@ -5670,13 +5717,12 @@ public:
/// getCallOperandValEVT - Return the EVT of the Value* that this operand
/// corresponds to. If there is no Value* for this operand, it returns
/// MVT::Other.
- EVT getCallOperandValEVT(LLVMContext &Context,
- const TargetLowering &TLI,
- const DataLayout *DL) const {
+ EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL) const {
if (!CallOperandVal) return MVT::Other;
if (isa<BasicBlock>(CallOperandVal))
- return TLI.getPointerTy();
+ return TLI.getPointerTy(DL);
llvm::Type *OpTy = CallOperandVal->getType();
@@ -5698,7 +5744,7 @@ public:
// If OpTy is not a single value, it may be a struct/union that we
// can tile with integers.
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
- unsigned BitSize = DL->getTypeSizeInBits(OpTy);
+ unsigned BitSize = DL.getTypeSizeInBits(OpTy);
switch (BitSize) {
default: break;
case 1:
@@ -5712,7 +5758,7 @@ public:
}
}
- return TLI.getValueType(OpTy, true);
+ return TLI.getValueType(DL, OpTy, true);
}
};
@@ -5838,8 +5884,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfoVector ConstraintOperands;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::AsmOperandInfoVector TargetConstraints =
- TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS);
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
+ DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
bool hasMemory = false;
@@ -5864,10 +5910,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// corresponding argument.
assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
- OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo));
+ OpVT = TLI.getSimpleValueType(DAG.getDataLayout(),
+ STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpVT = TLI.getSimpleValueType(CS.getType());
+ OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
}
++ResNo;
break;
@@ -5888,8 +5935,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
- OpVT =
- OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT();
+ OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
+ DAG.getDataLayout()).getSimpleVT();
}
OpInfo.ConstraintVT = OpVT;
@@ -5977,17 +6024,19 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const Value *OpVal = OpInfo.CallOperandVal;
if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
- OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
- TLI.getPointerTy());
+ OpInfo.CallOperand = DAG.getConstantPool(
+ cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
} else {
// Otherwise, create a stack slot and emit a store to it before the
// asm.
Type *Ty = OpVal->getType();
- uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
- unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+ auto &DL = DAG.getDataLayout();
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
+ unsigned Align = DL.getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ SDValue StackSlot =
+ DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
Chain = DAG.getStore(Chain, getCurSDLoc(),
OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
@@ -6022,9 +6071,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
std::vector<SDValue> AsmNodeOperands;
AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
- AsmNodeOperands.push_back(
- DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
- TLI.getPointerTy()));
+ AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
+ IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout())));
// If we have a !srcloc metadata node associated with it, we want to attach
// this to the ultimately generated inline asm machineinstr. To do this, we
@@ -6064,8 +6112,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
}
- AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, getCurSDLoc(),
- TLI.getPointerTy()));
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
// Loop over all of the inputs, copying the operand values into the
// appropriate registers and processing the output regs.
@@ -6201,8 +6249,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
OpInfo.getMatchedOperand());
- AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, getCurSDLoc(),
- TLI.getPointerTy()));
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
break;
}
@@ -6227,16 +6275,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType =
InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
- AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
- getCurSDLoc(),
- TLI.getPointerTy()));
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
break;
}
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
- assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+ assert(InOperandVal.getValueType() ==
+ TLI.getPointerTy(DAG.getDataLayout()) &&
"Memory operands expect pointer values");
unsigned ConstraintID =
@@ -6314,7 +6362,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// FIXME: Why don't we do this for inline asms with MRVs?
if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
- EVT ResultType = TLI.getValueType(CS.getType());
+ EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType());
// If any of the results of the inline asm is a vector, it may have the
// wrong width/num elts. This can happen for register classes that can
@@ -6380,9 +6428,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const DataLayout &DL = *TLI.getDataLayout();
- SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(),
- getRoot(), getValue(I.getOperand(0)),
+ const DataLayout &DL = DAG.getDataLayout();
+ SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ getCurSDLoc(), getRoot(), getValue(I.getOperand(0)),
DAG.getSrcValue(I.getOperand(0)),
DL.getABITypeAlignment(I.getType()));
setValue(&I, V);
@@ -6473,8 +6521,8 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
} else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
- Ops.push_back(
- Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+ Ops.push_back(Builder.DAG.getTargetFrameIndex(
+ FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout())));
} else
Ops.push_back(OpVal);
}
@@ -6654,7 +6702,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// Create the return types based on the intrinsic definition
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 3> ValueVTs;
- ComputeValueVTs(TLI, CS->getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
assert(ValueVTs.size() == 1 && "Expected only one return value type.");
// There is always a chain and a glue type at the end
@@ -6718,10 +6766,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Type *OrigRetTy = CLI.RetTy;
SmallVector<EVT, 4> RetTys;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets);
+ auto &DL = CLI.DAG.getDataLayout();
+ ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this);
+ GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
bool CanLowerReturn =
this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
@@ -6733,13 +6782,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// FIXME: equivalent assert?
// assert(!CS.hasInAllocaArgument() &&
// "sret demotion is incompatible with inalloca");
- uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy);
- unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy);
+ uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
+ unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
MachineFunction &MF = CLI.DAG.getMachineFunction();
DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
- DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy());
+ DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
ArgListEntry Entry;
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
@@ -6784,7 +6833,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ArgListTy &Args = CLI.getArgs();
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+ ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
Type *FinalType = Args[i].Ty;
if (Args[i].isByVal)
FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
@@ -6797,7 +6846,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
- unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy);
+ unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
if (Args[i].isZExt)
Flags.setZExt();
@@ -6821,14 +6870,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (Args[i].isByVal || Args[i].isInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
- Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
+ Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
// For ByVal, alignment should come from FE. BE will guess if this
// info is not there but there are cases it cannot get right.
unsigned FrameAlign;
if (Args[i].Alignment)
FrameAlign = Args[i].Alignment;
else
- FrameAlign = getByValTypeAlignment(ElementTy);
+ FrameAlign = getByValTypeAlignment(ElementTy, DL);
Flags.setByValAlign(FrameAlign);
}
if (Args[i].isNest)
@@ -6923,7 +6972,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SmallVector<EVT, 1> PVTs;
Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
- ComputeValueVTs(*this, PtrRetTy, PVTs);
+ ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
EVT PtrVT = PVTs[0];
@@ -6997,7 +7046,8 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
+ V->getType());
SDValue Chain = DAG.getEntryNode();
ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -7030,13 +7080,14 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
- const DataLayout *DL = TLI->getDataLayout();
+ const DataLayout &DL = DAG.getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
if (!FuncInfo->CanLowerReturn) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
- ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ ComputeValueVTs(*TLI, DAG.getDataLayout(),
+ PointerType::getUnqual(F.getReturnType()), ValueVTs);
// NOTE: Assuming that a pointer will never break down to more than one VT
// or one register.
@@ -7053,7 +7104,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
I != E; ++I, ++Idx) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, I->getType(), ValueVTs);
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
bool isArgValueUsed = !I->use_empty();
unsigned PartBase = 0;
Type *FinalType = I->getType();
@@ -7066,7 +7117,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
- unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy);
+ unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
Flags.setZExt();
@@ -7090,14 +7141,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (Flags.isByVal() || Flags.isInAlloca()) {
PointerType *Ty = cast<PointerType>(I->getType());
Type *ElementTy = Ty->getElementType();
- Flags.setByValSize(DL->getTypeAllocSize(ElementTy));
+ Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
unsigned FrameAlign;
if (F.getParamAlignment(Idx))
FrameAlign = F.getParamAlignment(Idx);
else
- FrameAlign = TLI->getByValTypeAlignment(ElementTy);
+ FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
Flags.setByValAlign(FrameAlign);
}
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
@@ -7153,7 +7204,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Create a virtual register for the sret pointer, and put in a copy
// from the sret argument into it.
SmallVector<EVT, 1> ValueVTs;
- ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ ComputeValueVTs(*TLI, DAG.getDataLayout(),
+ PointerType::getUnqual(F.getReturnType()), ValueVTs);
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
ISD::NodeType AssertOp = ISD::DELETED_NODE;
@@ -7177,7 +7229,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
++I, ++Idx) {
SmallVector<SDValue, 4> ArgValues;
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, I->getType(), ValueVTs);
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
// If this argument is unused then remember its value. It is used to generate
@@ -7324,7 +7376,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs);
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
@@ -7595,7 +7647,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) {
// FIXME: Using the pointer type doesn't seem ideal.
- uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
+ uint64_t BW = DAG.getDataLayout().getPointerSizeInBits();
uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
return Range <= BW;
}
@@ -7650,8 +7702,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
APInt LowBound;
APInt CmpRange;
- const int BitWidth =
- DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
+ const int BitWidth = DAG.getTargetLoweringInfo()
+ .getPointerTy(DAG.getDataLayout())
+ .getSizeInBits();
assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
if (Low.isNonNegative() && High.slt(BitWidth)) {
@@ -7731,7 +7784,7 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
// If target does not have legal shift left, do not emit bit tests at all.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT PTy = TLI.getPointerTy();
+ EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
if (!TLI.isOperationLegal(ISD::SHL, PTy))
return;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f225d54d189d..700675453fe7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -755,8 +755,6 @@ public:
void visitJumpTable(JumpTable &JT);
void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB);
- unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV,
- MachineBasicBlock *LPadMBB);
private:
// These all get lowered before this pass.
@@ -915,8 +913,8 @@ struct RegsForValue {
RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
- RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg,
- Type *Ty);
+ RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL, unsigned Reg, Type *Ty);
/// append - Add the specified values to this one.
void append(const RegsForValue &RHS) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index ef468a2b1c54..5b9b18286fae 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -95,7 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::FRAMEADDR: return "FRAMEADDR";
- case ISD::FRAME_ALLOC_RECOVER: return "FRAME_ALLOC_RECOVER";
+ case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER";
case ISD::READ_REGISTER: return "READ_REGISTER";
case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 31f8210f40f0..97ece8b9248a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -921,7 +921,8 @@ void SelectionDAGISel::DoInstructionSelection() {
bool SelectionDAGISel::PrepareEHLandingPad() {
MachineBasicBlock *MBB = FuncInfo->MBB;
- const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
+ const TargetRegisterClass *PtrRC =
+ TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
@@ -1931,7 +1932,8 @@ SDNode
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
unsigned Reg =
- TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0));
+ TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),
+ *CurDAG);
SDValue New = CurDAG->getCopyFromReg(
Op->getOperand(0), dl, Reg, Op->getValueType(0));
New->setNodeId(-1);
@@ -1944,7 +1946,8 @@ SDNode
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),
- Op->getOperand(2).getValueType());
+ Op->getOperand(2).getValueType(),
+ *CurDAG);
SDValue New = CurDAG->getCopyToReg(
Op->getOperand(0), dl, Reg, Op->getOperand(2));
New->setNodeId(-1);
@@ -2329,21 +2332,23 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
-CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const TargetLowering *TLI) {
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (N.getValueType() == VT) return true;
// Handle the case when VT is iPTR.
- return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy();
+ return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const TargetLowering *TLI, unsigned ChildNo) {
+ SDValue N, const TargetLowering *TLI, const DataLayout &DL,
+ unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
return false; // Match fails if out of range child #.
- return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+ return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI,
+ DL);
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
@@ -2355,13 +2360,13 @@ CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const TargetLowering *TLI) {
+ SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (cast<VTSDNode>(N)->getVT() == VT)
return true;
// Handle the case when VT is iPTR.
- return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy();
+ return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
@@ -2444,7 +2449,8 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
Result = !::CheckOpcode(Table, Index, N.getNode());
return Index;
case SelectionDAGISel::OPC_CheckType:
- Result = !::CheckType(Table, Index, N, SDISel.TLI);
+ Result = !::CheckType(Table, Index, N, SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
return Index;
case SelectionDAGISel::OPC_CheckChild0Type:
case SelectionDAGISel::OPC_CheckChild1Type:
@@ -2454,15 +2460,16 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
case SelectionDAGISel::OPC_CheckChild5Type:
case SelectionDAGISel::OPC_CheckChild6Type:
case SelectionDAGISel::OPC_CheckChild7Type:
- Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
- Table[Index - 1] -
- SelectionDAGISel::OPC_CheckChild0Type);
+ Result = !::CheckChildType(
+ Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(),
+ Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type);
return Index;
case SelectionDAGISel::OPC_CheckCondCode:
Result = !::CheckCondCode(Table, Index, N);
return Index;
case SelectionDAGISel::OPC_CheckValueType:
- Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
return Index;
case SelectionDAGISel::OPC_CheckInteger:
Result = !::CheckInteger(Table, Index, N);
@@ -2816,7 +2823,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
continue;
case OPC_CheckType:
- if (!::CheckType(MatcherTable, MatcherIndex, N, TLI))
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout()))
break;
continue;
@@ -2864,7 +2872,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (CaseVT == MVT::iPTR)
- CaseVT = TLI->getPointerTy();
+ CaseVT = TLI->getPointerTy(CurDAG->getDataLayout());
// If the VT matches, then we will execute this case.
if (CurNodeVT == CaseVT)
@@ -2887,14 +2895,16 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckChild4Type: case OPC_CheckChild5Type:
case OPC_CheckChild6Type: case OPC_CheckChild7Type:
if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
- Opcode-OPC_CheckChild0Type))
+ CurDAG->getDataLayout(),
+ Opcode - OPC_CheckChild0Type))
break;
continue;
case OPC_CheckCondCode:
if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
continue;
case OPC_CheckValueType:
- if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI))
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout()))
break;
continue;
case OPC_CheckInteger:
@@ -3097,7 +3107,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
MVT::SimpleValueType VT =
(MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (VT == MVT::iPTR)
- VT = TLI->getPointerTy().SimpleTy;
+ VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy;
VTs.push_back(VT);
}
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index bd40cac95543..34688df4765b 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -337,9 +337,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
// TODO: To eliminate this problem we can remove gc.result intrinsics
// completelly and make statepoint call to return a tuple.
unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType());
- RegsForValue RFV(*Builder.DAG.getContext(),
- Builder.DAG.getTargetLoweringInfo(), Reg,
- ISP.getActualReturnType());
+ RegsForValue RFV(
+ *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(),
+ Builder.DAG.getDataLayout(), Reg, ISP.getActualReturnType());
SDValue Chain = Builder.DAG.getEntryNode();
RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain,
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e7722b392a81..fbf651277c7f 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -102,7 +102,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
}
if (LC == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported library call operation!");
- SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy(DAG.getDataLayout()));
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -206,14 +207,16 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
- SDValue Tmp = DAG.getNode(ISD::SETCC, dl,
- getSetCCResultType(*DAG.getContext(), RetVT),
- NewLHS, NewRHS, DAG.getCondCode(CCCode));
+ SDValue Tmp = DAG.getNode(
+ ISD::SETCC, dl,
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(CCCode));
NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/,
dl).first;
- NewLHS = DAG.getNode(ISD::SETCC, dl,
- getSetCCResultType(*DAG.getContext(), RetVT), NewLHS,
- NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
+ NewLHS = DAG.getNode(
+ ISD::SETCC, dl,
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
NewRHS = SDValue();
}
@@ -242,7 +245,7 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
- return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0));
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
return Table;
}
@@ -265,9 +268,7 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// In dynamic-no-pic mode, assume that known defined values are safe.
if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
- GA &&
- !GA->getGlobal()->isDeclaration() &&
- !GA->getGlobal()->isWeakForLinker())
+ GA && GA->getGlobal()->isStrongDefinitionForLinker())
return true;
// Otherwise assume nothing is safe.
@@ -383,6 +384,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
"Mask size mismatches value type size!");
APInt NewMask = DemandedMask;
SDLoc dl(Op);
+ auto &DL = TLO.DAG.getDataLayout();
// Don't know anything.
KnownZero = KnownOne = APInt(BitWidth, 0);
@@ -645,7 +647,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
unsigned InnerBits = InnerVT.getSizeInBits();
if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
- EVT ShTy = getShiftAmountTy(InnerVT);
+ EVT ShTy = getShiftAmountTy(InnerVT, DL);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
ShTy = InnerVT;
SDValue NarrowShl =
@@ -824,7 +826,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// for scalar types after legalization.
EVT ShiftAmtTy = Op.getValueType();
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
- ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl,
ShiftAmtTy);
@@ -1009,8 +1011,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SDValue Shift = In.getOperand(1);
if (TLO.LegalTypes()) {
uint64_t ShVal = ShAmt->getZExtValue();
- Shift =
- TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(Op.getValueType()));
+ Shift = TLO.DAG.getConstant(ShVal, dl,
+ getShiftAmountTy(Op.getValueType(), DL));
}
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
@@ -1400,7 +1402,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
if ((newMask & Mask) == Mask) {
- if (!getDataLayout()->isLittleEndian())
+ if (!DAG.getDataLayout().isLittleEndian())
bestOffset = (origWidth/width - offset - 1) * (width/8);
else
bestOffset = (uint64_t)offset * (width/8);
@@ -1473,7 +1475,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (DCI.isBeforeLegalizeOps() ||
(isOperationLegal(ISD::SETCC, newVT) &&
getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) {
- EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT);
+ EVT NewSetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
@@ -1692,11 +1695,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
(VT == N0.getValueType() ||
(isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
- N0.getOpcode() == ISD::AND)
+ N0.getOpcode() == ISD::AND) {
+ auto &DL = DAG.getDataLayout();
if (ConstantSDNode *AndRHS =
dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = DCI.isBeforeLegalize() ?
- getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT ShiftTy = DCI.isBeforeLegalize()
+ ? getPointerTy(DL)
+ : getShiftAmountTy(N0.getValueType(), DL);
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
if (AndRHS->getAPIntValue().isPowerOf2()) {
@@ -1716,6 +1721,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
}
+ }
if (C1.getMinSignedBits() <= 64 &&
!isLegalICmpImmediate(C1.getSExtValue())) {
@@ -1727,8 +1733,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
- EVT ShiftTy = DCI.isBeforeLegalize() ?
- getPointerTy() : getShiftAmountTy(N0.getValueType());
+ auto &DL = DAG.getDataLayout();
+ EVT ShiftTy = DCI.isBeforeLegalize()
+ ? getPointerTy(DL)
+ : getShiftAmountTy(N0.getValueType(), DL);
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
DAG.getConstant(ShiftBits, dl,
@@ -1757,8 +1765,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
NewC = NewC.lshr(ShiftBits);
if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
isLegalICmpImmediate(NewC.getSExtValue())) {
- EVT ShiftTy = DCI.isBeforeLegalize() ?
- getPointerTy() : getShiftAmountTy(N0.getValueType());
+ auto &DL = DAG.getDataLayout();
+ EVT ShiftTy = DCI.isBeforeLegalize()
+ ? getPointerTy(DL)
+ : getShiftAmountTy(N0.getValueType(), DL);
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
DAG.getConstant(ShiftBits, dl, ShiftTy));
@@ -1945,10 +1955,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Cond);
if (N0.getNode()->hasOneUse()) {
assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ auto &DL = DAG.getDataLayout();
// (Z-X) == X --> Z == X<<1
- SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
- DAG.getConstant(1, dl,
- getShiftAmountTy(N1.getValueType())));
+ SDValue SH = DAG.getNode(
+ ISD::SHL, dl, N1.getValueType(), N1,
+ DAG.getConstant(1, dl,
+ getShiftAmountTy(N1.getValueType(), DL)));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
@@ -1969,10 +1981,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
DAG.getConstant(0, dl, N1.getValueType()), Cond);
if (N1.getNode()->hasOneUse()) {
assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+ auto &DL = DAG.getDataLayout();
// X == (Z-X) --> X<<1 == Z
- SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
- DAG.getConstant(1, dl,
- getShiftAmountTy(N0.getValueType())));
+ SDValue SH = DAG.getNode(
+ ISD::SHL, dl, N1.getValueType(), N0,
+ DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL)));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
@@ -2105,9 +2118,8 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
// Inline Assembler Implementation Methods
//===----------------------------------------------------------------------===//
-
TargetLowering::ConstraintType
-TargetLowering::getConstraintType(const std::string &Constraint) const {
+TargetLowering::getConstraintType(StringRef Constraint) const {
unsigned S = Constraint.size();
if (S == 1) {
@@ -2140,7 +2152,7 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
}
if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
- if (S == 8 && !Constraint.compare(1, 6, "memory", 6)) // "{memory}"
+ if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
return C_Memory;
return C_Register;
}
@@ -2206,8 +2218,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
C ? SDLoc(C) : SDLoc(),
Op.getValueType(), Offs));
- return;
}
+ return;
}
if (C) { // just C, no GV.
// Simple constants are not allowed for 's'.
@@ -2217,8 +2229,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// ScheduleDAGSDNodes::EmitNode, which is very generic.
Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
SDLoc(C), MVT::i64));
- return;
}
+ return;
}
break;
}
@@ -2227,7 +2239,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::pair<unsigned, const TargetRegisterClass *>
TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
if (Constraint.empty() || Constraint[0] != '{')
return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
@@ -2293,7 +2305,8 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
/// If this returns an empty vector, and if the constraint string itself
/// isn't empty, there was an error parsing.
TargetLowering::AsmOperandInfoVector
-TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
+TargetLowering::ParseConstraints(const DataLayout &DL,
+ const TargetRegisterInfo *TRI,
ImmutableCallSite CS) const {
/// ConstraintOperands - Information about all of the constraints.
AsmOperandInfoVector ConstraintOperands;
@@ -2329,10 +2342,11 @@ TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
assert(!CS.getType()->isVoidTy() &&
"Bad inline asm!");
if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
- OpInfo.ConstraintVT = getSimpleValueType(STy->getElementType(ResNo));
+ OpInfo.ConstraintVT =
+ getSimpleValueType(DL, STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT = getSimpleValueType(CS.getType());
+ OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
}
++ResNo;
break;
@@ -2361,7 +2375,7 @@ TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
// If OpTy is not a single value, it may be a struct/union that we
// can tile with integers.
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
- unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy);
+ unsigned BitSize = DL.getTypeSizeInBits(OpTy);
switch (BitSize) {
default: break;
case 1:
@@ -2375,8 +2389,7 @@ TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
break;
}
} else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
- unsigned PtrSize
- = getDataLayout()->getPointerSizeInBits(PT->getAddressSpace());
+ unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
} else {
OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
@@ -2684,7 +2697,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
if (ShAmt) {
// TODO: For UDIV use SRL instead of SRA.
SDValue Amt =
- DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType()));
+ DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(),
+ DAG.getDataLayout()));
SDNodeFlags Flags;
Flags.setExact(true);
Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags);
@@ -2750,17 +2764,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
Created->push_back(Q.getNode());
}
+ auto &DL = DAG.getDataLayout();
// Shift right algebraic if shift value is nonzero
if (magics.s > 0) {
- Q = DAG.getNode(ISD::SRA, dl, VT, Q,
- DAG.getConstant(magics.s, dl,
- getShiftAmountTy(Q.getValueType())));
+ Q = DAG.getNode(
+ ISD::SRA, dl, VT, Q,
+ DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
Created->push_back(Q.getNode());
}
// Extract the sign bit and add it to the quotient
- SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q,
- DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
- getShiftAmountTy(Q.getValueType())));
+ SDValue T =
+ DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
+ getShiftAmountTy(Q.getValueType(), DL)));
Created->push_back(T.getNode());
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
@@ -2776,6 +2792,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
EVT VT = N->getValueType(0);
SDLoc dl(N);
+ auto &DL = DAG.getDataLayout();
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
@@ -2792,9 +2809,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
// the divided value upfront.
if (magics.a != 0 && !Divisor[0]) {
unsigned Shift = Divisor.countTrailingZeros();
- Q = DAG.getNode(ISD::SRL, dl, VT, Q,
- DAG.getConstant(Shift, dl,
- getShiftAmountTy(Q.getValueType())));
+ Q = DAG.getNode(
+ ISD::SRL, dl, VT, Q,
+ DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL)));
Created->push_back(Q.getNode());
// Get magic number for the shifted divisor.
@@ -2819,21 +2836,22 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
if (magics.a == 0) {
assert(magics.s < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
- return DAG.getNode(ISD::SRL, dl, VT, Q,
- DAG.getConstant(magics.s, dl,
- getShiftAmountTy(Q.getValueType())));
+ return DAG.getNode(
+ ISD::SRL, dl, VT, Q,
+ DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
} else {
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
Created->push_back(NPQ.getNode());
- NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
- DAG.getConstant(1, dl,
- getShiftAmountTy(NPQ.getValueType())));
+ NPQ = DAG.getNode(
+ ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
Created->push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
Created->push_back(NPQ.getNode());
- return DAG.getNode(ISD::SRL, dl, VT, NPQ,
- DAG.getConstant(magics.s - 1, dl,
- getShiftAmountTy(NPQ.getValueType())));
+ return DAG.getNode(
+ ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(magics.s - 1, dl,
+ getShiftAmountTy(NPQ.getValueType(), DL)));
}
}
@@ -2919,8 +2937,9 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
if (!LH.getNode() && !RH.getNode() &&
isOperationLegalOrCustom(ISD::SRL, VT) &&
isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ auto &DL = DAG.getDataLayout();
unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits();
- SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT));
+ SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL));
LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift);
LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift);
@@ -2980,14 +2999,15 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
- SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
- DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
- DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT)));
+ auto &DL = DAG.getDataLayout();
+ SDValue ExponentBits = DAG.getNode(
+ ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+ DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL)));
SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
- SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
- DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
- DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT)));
+ SDValue Sign = DAG.getNode(
+ ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+ DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL)));
Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
@@ -2996,17 +3016,17 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
R = DAG.getZExtOrTrunc(R, dl, NVT);
-
- R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
- DAG.getNode(ISD::SHL, dl, NVT, R,
- DAG.getZExtOrTrunc(
- DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
- dl, getShiftAmountTy(IntVT))),
- DAG.getNode(ISD::SRL, dl, NVT, R,
- DAG.getZExtOrTrunc(
- DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
- dl, getShiftAmountTy(IntVT))),
- ISD::SETGT);
+ R = DAG.getSelectCC(
+ dl, Exponent, ExponentLoBit,
+ DAG.getNode(ISD::SHL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+ dl, getShiftAmountTy(IntVT, DL))),
+ DAG.getNode(ISD::SRL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+ dl, getShiftAmountTy(IntVT, DL))),
+ ISD::SETGT);
SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index 0e89bad5f26f..00db94256844 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -15,9 +15,5 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-TargetSelectionDAGInfo::TargetSelectionDAGInfo(const DataLayout *DL)
- : DL(DL) {
-}
-
TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
}
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 116fd5be0337..d236e1f5ab6f 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -45,7 +45,6 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
class SjLjEHPrepare : public FunctionPass {
- const TargetMachine *TM;
Type *doubleUnderDataTy;
Type *doubleUnderJBufTy;
Type *FunctionContextTy;
@@ -63,7 +62,7 @@ class SjLjEHPrepare : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {}
+ explicit SjLjEHPrepare() : FunctionPass(ID) {}
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@@ -83,11 +82,11 @@ private:
} // end anonymous namespace
char SjLjEHPrepare::ID = 0;
+INITIALIZE_PASS(SjLjEHPrepare, "sjljehprepare", "Prepare SjLj exceptions",
+ false, false)
// Public Interface To the SjLjEHPrepare pass.
-FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) {
- return new SjLjEHPrepare(TM);
-}
+FunctionPass *llvm::createSjLjEHPreparePass() { return new SjLjEHPrepare(); }
// doInitialization - Set up decalarations and types needed to process
// exceptions.
bool SjLjEHPrepare::doInitialization(Module &M) {
@@ -196,9 +195,8 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an alloca
// because the value needs to be added to the global context list.
- const TargetLowering *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
- unsigned Align =
- TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
+ auto &DL = F.getParent()->getDataLayout();
+ unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
EntryBB->begin());
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index d88be575d56c..855058358fe4 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -49,7 +49,6 @@ namespace {
/// information provided by this pass is optional and not required by the
/// aformentioned intrinsic to function.
class StackMapLiveness : public MachineFunctionPass {
- MachineFunction *MF;
const TargetRegisterInfo *TRI;
LivePhysRegs LiveRegs;
@@ -68,14 +67,14 @@ public:
private:
/// \brief Performs the actual liveness calculation for the function.
- bool calculateLiveness();
+ bool calculateLiveness(MachineFunction &MF);
/// \brief Add the current register live set to the instruction.
- void addLiveOutSetToMI(MachineInstr &MI);
+ void addLiveOutSetToMI(MachineFunction &MF, MachineInstr &MI);
/// \brief Create a register mask and initialize it with the registers from
/// the register live set.
- uint32_t *createRegisterMask() const;
+ uint32_t *createRegisterMask(MachineFunction &MF) const;
};
} // namespace
@@ -95,8 +94,7 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
// We preserve all information.
AU.setPreservesAll();
AU.setPreservesCFG();
- // Default dependencie for all MachineFunction passes.
- AU.addRequired<MachineFunctionAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
/// Calculate the liveness information for the given machine function.
@@ -106,7 +104,6 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName()
<< " **********\n");
- this->MF = &MF;
TRI = MF.getSubtarget().getRegisterInfo();
++NumStackMapFuncVisited;
@@ -115,25 +112,23 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
++NumStackMapFuncSkipped;
return false;
}
- return calculateLiveness();
+ return calculateLiveness(MF);
}
/// Performs the actual liveness calculation for the function.
-bool StackMapLiveness::calculateLiveness() {
+bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
bool HasChanged = false;
// For all basic blocks in the function.
- for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
- MBBI != MBBE; ++MBBI) {
- DEBUG(dbgs() << "****** BB " << MBBI->getName() << " ******\n");
+ for (auto &MBB : MF) {
+ DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
LiveRegs.init(TRI);
- LiveRegs.addLiveOuts(MBBI);
+ LiveRegs.addLiveOuts(&MBB);
bool HasStackMap = false;
// Reverse iterate over all instructions and add the current live register
// set to an instruction if we encounter a patchpoint instruction.
- for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(),
- E = MBBI->rend(); I != E; ++I) {
+ for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
if (I->getOpcode() == TargetOpcode::PATCHPOINT) {
- addLiveOutSetToMI(*I);
+ addLiveOutSetToMI(MF, *I);
HasChanged = true;
HasStackMap = true;
++NumStackMaps;
@@ -149,21 +144,23 @@ bool StackMapLiveness::calculateLiveness() {
}
/// Add the current register live set to the instruction.
-void StackMapLiveness::addLiveOutSetToMI(MachineInstr &MI) {
- uint32_t *Mask = createRegisterMask();
+void StackMapLiveness::addLiveOutSetToMI(MachineFunction &MF,
+ MachineInstr &MI) {
+ uint32_t *Mask = createRegisterMask(MF);
MachineOperand MO = MachineOperand::CreateRegLiveOut(Mask);
- MI.addOperand(*MF, MO);
+ MI.addOperand(MF, MO);
}
/// Create a register mask and initialize it with the registers from the
/// register live set.
-uint32_t *StackMapLiveness::createRegisterMask() const {
+uint32_t *StackMapLiveness::createRegisterMask(MachineFunction &MF) const {
// The mask is owned and cleaned up by the Machine Function.
- uint32_t *Mask = MF->allocateRegisterMask(TRI->getNumRegs());
- for (LivePhysRegs::const_iterator RI = LiveRegs.begin(), RE = LiveRegs.end();
- RI != RE; ++RI)
- Mask[*RI / 32] |= 1U << (*RI % 32);
+ uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ for (auto Reg : LiveRegs)
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ // Give the target a chance to adjust the mask.
TRI->adjustStackMapLiveOutMask(Mask);
+
return Mask;
}
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 1e8e03f9a7df..116eef66c580 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -29,17 +29,17 @@ using namespace llvm;
#define DEBUG_TYPE "stackmaps"
-static cl::opt<int> StackMapVersion("stackmap-version", cl::init(1),
- cl::desc("Specify the stackmap encoding version (default = 1)"));
+static cl::opt<int> StackMapVersion(
+ "stackmap-version", cl::init(1),
+ cl::desc("Specify the stackmap encoding version (default = 1)"));
const char *StackMaps::WSMP = "Stack Maps: ";
PatchPointOpers::PatchPointOpers(const MachineInstr *MI)
- : MI(MI),
- HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
- !MI->getOperand(0).isImplicit()),
- IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg)
-{
+ : MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+ !MI->getOperand(0).isImplicit()),
+ IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() ==
+ CallingConv::AnyReg) {
#ifndef NDEBUG
unsigned CheckStartIdx = 0, e = MI->getNumOperands();
while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() &&
@@ -76,30 +76,31 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
/// Go up the super-register chain until we hit a valid dwarf register number.
static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
- int RegNo = TRI->getDwarfRegNum(Reg, false);
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
- RegNo = TRI->getDwarfRegNum(*SR, false);
+ int RegNum = TRI->getDwarfRegNum(Reg, false);
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNum < 0; ++SR)
+ RegNum = TRI->getDwarfRegNum(*SR, false);
- assert(RegNo >= 0 && "Invalid Dwarf register number.");
- return (unsigned) RegNo;
+ assert(RegNum >= 0 && "Invalid Dwarf register number.");
+ return (unsigned)RegNum;
}
MachineInstr::const_mop_iterator
StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
- MachineInstr::const_mop_iterator MOE,
- LocationVec &Locs, LiveOutVec &LiveOuts) const {
+ MachineInstr::const_mop_iterator MOE, LocationVec &Locs,
+ LiveOutVec &LiveOuts) const {
const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
if (MOI->isImm()) {
switch (MOI->getImm()) {
- default: llvm_unreachable("Unrecognized operand type.");
+ default:
+ llvm_unreachable("Unrecognized operand type.");
case StackMaps::DirectMemRefOp: {
unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
assert((Size % 8) == 0 && "Need pointer size in bytes.");
Size /= 8;
unsigned Reg = (++MOI)->getReg();
int64_t Imm = (++MOI)->getImm();
- Locs.push_back(Location(StackMaps::Location::Direct, Size,
- getDwarfRegNum(Reg, TRI), Imm));
+ Locs.emplace_back(StackMaps::Location::Direct, Size,
+ getDwarfRegNum(Reg, TRI), Imm);
break;
}
case StackMaps::IndirectMemRefOp: {
@@ -107,15 +108,15 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
assert(Size > 0 && "Need a valid size for indirect memory locations.");
unsigned Reg = (++MOI)->getReg();
int64_t Imm = (++MOI)->getImm();
- Locs.push_back(Location(StackMaps::Location::Indirect, Size,
- getDwarfRegNum(Reg, TRI), Imm));
+ Locs.emplace_back(StackMaps::Location::Indirect, Size,
+ getDwarfRegNum(Reg, TRI), Imm);
break;
}
case StackMaps::ConstantOp: {
++MOI;
assert(MOI->isImm() && "Expected constant operand.");
int64_t Imm = MOI->getImm();
- Locs.push_back(Location(Location::Constant, sizeof(int64_t), 0, Imm));
+ Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, Imm);
break;
}
}
@@ -137,14 +138,13 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
assert(!MOI->getSubReg() && "Physical subreg still around.");
unsigned Offset = 0;
- unsigned RegNo = getDwarfRegNum(MOI->getReg(), TRI);
- unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
- unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, MOI->getReg());
+ unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI);
+ unsigned LLVMRegNum = TRI->getLLVMRegNum(DwarfRegNum, false);
+ unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg());
if (SubRegIdx)
Offset = TRI->getSubRegIdxOffset(SubRegIdx);
- Locs.push_back(
- Location(Location::Register, RC->getSize(), RegNo, Offset));
+ Locs.emplace_back(Location::Register, RC->getSize(), DwarfRegNum, Offset);
return ++MOI;
}
@@ -165,19 +165,19 @@ void StackMaps::print(raw_ostream &OS) {
OS << WSMP << "callsite " << CSI.ID << "\n";
OS << WSMP << " has " << CSLocs.size() << " locations\n";
- unsigned OperIdx = 0;
+ unsigned Idx = 0;
for (const auto &Loc : CSLocs) {
- OS << WSMP << " Loc " << OperIdx << ": ";
- switch (Loc.LocType) {
+ OS << WSMP << "\t\tLoc " << Idx << ": ";
+ switch (Loc.Type) {
case Location::Unprocessed:
OS << "<Unprocessed operand>";
break;
case Location::Register:
OS << "Register ";
- if (TRI)
- OS << TRI->getName(Loc.Reg);
- else
- OS << Loc.Reg;
+ if (TRI)
+ OS << TRI->getName(Loc.Reg);
+ else
+ OS << Loc.Reg;
break;
case Location::Direct:
OS << "Direct ";
@@ -203,23 +203,23 @@ void StackMaps::print(raw_ostream &OS) {
OS << "Constant Index " << Loc.Offset;
break;
}
- OS << " [encoding: .byte " << Loc.LocType << ", .byte " << Loc.Size
+ OS << "\t[encoding: .byte " << Loc.Type << ", .byte " << Loc.Size
<< ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n";
- OperIdx++;
+ Idx++;
}
- OS << WSMP << " has " << LiveOuts.size() << " live-out registers\n";
+ OS << WSMP << "\thas " << LiveOuts.size() << " live-out registers\n";
- OperIdx = 0;
+ Idx = 0;
for (const auto &LO : LiveOuts) {
- OS << WSMP << " LO " << OperIdx << ": ";
+ OS << WSMP << "\t\tLO " << Idx << ": ";
if (TRI)
OS << TRI->getName(LO.Reg);
else
OS << LO.Reg;
- OS << " [encoding: .short " << LO.RegNo << ", .byte 0, .byte "
+ OS << "\t[encoding: .short " << LO.DwarfRegNum << ", .byte 0, .byte "
<< LO.Size << "]\n";
- OperIdx++;
+ Idx++;
}
}
}
@@ -227,9 +227,9 @@ void StackMaps::print(raw_ostream &OS) {
/// Create a live-out register record for the given register Reg.
StackMaps::LiveOutReg
StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
- unsigned RegNo = getDwarfRegNum(Reg, TRI);
+ unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI);
unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
- return LiveOutReg(Reg, RegNo, Size);
+ return LiveOutReg(Reg, DwarfRegNum, Size);
}
/// Parse the register live-out mask and return a vector of live-out registers
@@ -248,11 +248,16 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
// We don't need to keep track of a register if its super-register is already
// in the list. Merge entries that refer to the same dwarf register and use
// the maximum size that needs to be spilled.
- std::sort(LiveOuts.begin(), LiveOuts.end());
- for (LiveOutVec::iterator I = LiveOuts.begin(), E = LiveOuts.end();
- I != E; ++I) {
- for (LiveOutVec::iterator II = std::next(I); II != E; ++II) {
- if (I->RegNo != II->RegNo) {
+
+ std::sort(LiveOuts.begin(), LiveOuts.end(),
+ [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
+ // Only sort by the dwarf register number.
+ return LHS.DwarfRegNum < RHS.DwarfRegNum;
+ });
+
+ for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) {
+ for (auto II = std::next(I); II != E; ++II) {
+ if (I->DwarfRegNum != II->DwarfRegNum) {
// Skip all the now invalid entries.
I = --II;
break;
@@ -260,11 +265,15 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
I->Size = std::max(I->Size, II->Size);
if (TRI->isSuperRegister(I->Reg, II->Reg))
I->Reg = II->Reg;
- II->MarkInvalid();
+ II->Reg = 0; // mark for deletion.
}
}
- LiveOuts.erase(std::remove_if(LiveOuts.begin(), LiveOuts.end(),
- LiveOutReg::IsInvalid), LiveOuts.end());
+
+ LiveOuts.erase(
+ std::remove_if(LiveOuts.begin(), LiveOuts.end(),
+ [](const LiveOutReg &LO) { return LO.Reg == 0; }),
+ LiveOuts.end());
+
return LiveOuts;
}
@@ -282,8 +291,8 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
if (recordResult) {
assert(PatchPointOpers(&MI).hasDef() && "Stackmap has no return value.");
- parseOperand(MI.operands_begin(), std::next(MI.operands_begin()),
- Locations, LiveOuts);
+ parseOperand(MI.operands_begin(), std::next(MI.operands_begin()), Locations,
+ LiveOuts);
}
// Parse operands.
@@ -292,33 +301,31 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
}
// Move large constants into the constant pool.
- for (LocationVec::iterator I = Locations.begin(), E = Locations.end();
- I != E; ++I) {
+ for (auto &Loc : Locations) {
// Constants are encoded as sign-extended integers.
// -1 is directly encoded as .long 0xFFFFFFFF with no constant pool.
- if (I->LocType == Location::Constant && !isInt<32>(I->Offset)) {
- I->LocType = Location::ConstantIndex;
+ if (Loc.Type == Location::Constant && !isInt<32>(Loc.Offset)) {
+ Loc.Type = Location::ConstantIndex;
// ConstPool is intentionally a MapVector of 'uint64_t's (as
// opposed to 'int64_t's). We should never be in a situation
// where we have to insert either the tombstone or the empty
// keys into a map, and for a DenseMap<uint64_t, T> these are
// (uint64_t)0 and (uint64_t)-1. They can be and are
// represented using 32 bit integers.
-
- assert((uint64_t)I->Offset != DenseMapInfo<uint64_t>::getEmptyKey() &&
- (uint64_t)I->Offset != DenseMapInfo<uint64_t>::getTombstoneKey() &&
+ assert((uint64_t)Loc.Offset != DenseMapInfo<uint64_t>::getEmptyKey() &&
+ (uint64_t)Loc.Offset !=
+ DenseMapInfo<uint64_t>::getTombstoneKey() &&
"empty and tombstone keys should fit in 32 bits!");
- auto Result = ConstPool.insert(std::make_pair(I->Offset, I->Offset));
- I->Offset = Result.first - ConstPool.begin();
+ auto Result = ConstPool.insert(std::make_pair(Loc.Offset, Loc.Offset));
+ Loc.Offset = Result.first - ConstPool.begin();
}
}
// Create an expression to calculate the offset of the callsite from function
// entry.
const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(MILabel, OutContext),
- MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext),
- OutContext);
+ MCSymbolRefExpr::create(MILabel, OutContext),
+ MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
std::move(LiveOuts));
@@ -326,10 +333,10 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
// Record the stack size of the current function.
const MachineFrameInfo *MFI = AP.MF->getFrameInfo();
const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo();
- const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
- RegInfo->needsStackRealignment(*(AP.MF));
+ bool HasDynamicFrameSize =
+ MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF));
FnStackSize[AP.CurrentFnSym] =
- DynamicFrameSize ? UINT64_MAX : MFI->getStackSize();
+ HasDynamicFrameSize ? UINT64_MAX : MFI->getStackSize();
}
void StackMaps::recordStackMap(const MachineInstr &MI) {
@@ -346,25 +353,23 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
PatchPointOpers opers(&MI);
int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm();
- MachineInstr::const_mop_iterator MOI =
- std::next(MI.operands_begin(), opers.getStackMapStartIdx());
+ auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx());
recordStackMapOpers(MI, ID, MOI, MI.operands_end(),
opers.isAnyReg() && opers.hasDef());
#ifndef NDEBUG
// verify anyregcc
- LocationVec &Locations = CSInfos.back().Locations;
+ auto &Locations = CSInfos.back().Locations;
if (opers.isAnyReg()) {
unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm();
- for (unsigned i = 0, e = (opers.hasDef() ? NArgs+1 : NArgs); i != e; ++i)
- assert(Locations[i].LocType == Location::Register &&
+ for (unsigned i = 0, e = (opers.hasDef() ? NArgs + 1 : NArgs); i != e; ++i)
+ assert(Locations[i].Type == Location::Register &&
"anyreg arg must be in reg.");
}
#endif
}
void StackMaps::recordStatepoint(const MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::STATEPOINT &&
- "expected statepoint");
+ assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
StatepointOpers opers(&MI);
// Record all the deopt and gc operands (they're contiguous and run from the
@@ -387,8 +392,8 @@ void StackMaps::recordStatepoint(const MachineInstr &MI) {
void StackMaps::emitStackmapHeader(MCStreamer &OS) {
// Header.
OS.EmitIntValue(StackMapVersion, 1); // Version.
- OS.EmitIntValue(0, 1); // Reserved.
- OS.EmitIntValue(0, 2); // Reserved.
+ OS.EmitIntValue(0, 1); // Reserved.
+ OS.EmitIntValue(0, 2); // Reserved.
// Num functions.
DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n');
@@ -412,7 +417,7 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
DEBUG(dbgs() << WSMP << "functions:\n");
for (auto const &FR : FnStackSize) {
DEBUG(dbgs() << WSMP << "function addr: " << FR.first
- << " frame size: " << FR.second);
+ << " frame size: " << FR.second);
OS.EmitSymbolValue(FR.first, 8);
OS.EmitIntValue(FR.second, 8);
}
@@ -424,7 +429,7 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
// Constant pool entries.
DEBUG(dbgs() << WSMP << "constants:\n");
- for (auto ConstEntry : ConstPool) {
+ for (const auto &ConstEntry : ConstPool) {
DEBUG(dbgs() << WSMP << ConstEntry.second << '\n');
OS.EmitIntValue(ConstEntry.second, 8);
}
@@ -489,7 +494,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
OS.EmitIntValue(CSLocs.size(), 2);
for (const auto &Loc : CSLocs) {
- OS.EmitIntValue(Loc.LocType, 1);
+ OS.EmitIntValue(Loc.Type, 1);
OS.EmitIntValue(Loc.Size, 1);
OS.EmitIntValue(Loc.Reg, 2);
OS.EmitIntValue(Loc.Offset, 4);
@@ -500,7 +505,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
OS.EmitIntValue(LiveOuts.size(), 2);
for (const auto &LO : LiveOuts) {
- OS.EmitIntValue(LO.RegNo, 2);
+ OS.EmitIntValue(LO.DwarfRegNum, 2);
OS.EmitIntValue(0, 1);
OS.EmitIntValue(LO.Size, 1);
}
@@ -511,7 +516,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
/// Serialize the stackmap data.
void StackMaps::serializeToStackMapSection() {
- (void) WSMP;
+ (void)WSMP;
// Bail out if there's no stack map data.
assert((!CSInfos.empty() || (CSInfos.empty() && ConstPool.empty())) &&
"Expected empty constant pool too!");
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 0824d6f91db0..bcea37a3aafa 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -122,7 +122,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
// If an array has more than SSPBufferSize bytes of allocated space, then we
// emit stack protectors.
- if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) {
+ if (SSPBufferSize <= M->getDataLayout().getTypeAllocSize(AT)) {
IsLarge = true;
return true;
}
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 56383247eadb..f3cccd82a5c5 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -11,9 +11,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -54,3 +57,30 @@ bool TargetFrameLowering::needsFrameIndexResolution(
const MachineFunction &MF) const {
return MF.getFrameInfo()->hasStackObjects();
}
+
+void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ // Get the callee saved register list...
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+
+ // Early exit if there are no callee saved registers.
+ if (!CSRegs || CSRegs[0] == 0)
+ return;
+
+ SavedRegs.resize(TRI.getNumRegs());
+
+ // In Naked functions we aren't going to save any registers.
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+ return;
+
+ // Functions which call __builtin_unwind_init get all their registers saved.
+ bool CallsUnwindInit = MF.getMMI().callsUnwindInit();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (CallsUnwindInit || MRI.isPhysRegModified(Reg))
+ SavedRegs.set(Reg);
+ }
+}
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 78492a6e8818..ecfd65931574 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -750,7 +750,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
initActions();
// Perform these initializations only once.
- IsLittleEndian = getDataLayout()->isLittleEndian();
MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
= MaxStoresPerMemmoveOptSize = 4;
@@ -879,28 +878,17 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
}
-MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
- return MVT::getIntegerVT(getPointerSizeInBits(AS));
+MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
+ EVT) const {
+ return MVT::getIntegerVT(8 * DL.getPointerSize(0));
}
-unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
- return getDataLayout()->getPointerSizeInBits(AS);
-}
-
-unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
- assert(Ty->isPointerTy());
- return getPointerSizeInBits(Ty->getPointerAddressSpace());
-}
-
-MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
- return MVT::getIntegerVT(8 * getDataLayout()->getPointerSize(0));
-}
-
-EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy,
+ const DataLayout &DL) const {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
- return getScalarShiftAmountTy(LHSTy);
+ return getScalarShiftAmountTy(DL, LHSTy);
}
/// canOpTrap - Returns true if the operation can trap for the value type.
@@ -1398,9 +1386,10 @@ void TargetLoweringBase::computeRegisterProperties(
}
}
-EVT TargetLoweringBase::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT VT) const {
assert(!VT.isVector() && "No default SetCC type for vectors!");
- return getPointerTy(0).SimpleTy;
+ return getPointerTy(DL).SimpleTy;
}
MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
@@ -1485,11 +1474,11 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
+void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, const DataLayout &DL) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, ReturnType, ValueVTs);
+ ComputeValueVTs(TLI, DL, ReturnType, ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
@@ -1534,8 +1523,9 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
-unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
- return getDataLayout()->getABITypeAlignment(Ty);
+unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const {
+ return DL.getABITypeAlignment(Ty);
}
//===----------------------------------------------------------------------===//
@@ -1614,9 +1604,10 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
}
std::pair<unsigned, MVT>
-TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const {
+TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
+ Type *Ty) const {
LLVMContext &C = Ty->getContext();
- EVT MTy = getValueType(Ty);
+ EVT MTy = getValueType(DL, Ty);
unsigned Cost = 1;
// We keep legalizing the type until we find a legal kind. We assume that
@@ -1642,8 +1633,8 @@ TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index e84bea63995e..1e30821dc741 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1215,11 +1215,11 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// addl %esi, %edi
// movl %edi, %eax
// ret
- bool commuted = false;
+ bool Commuted = false;
// If it's profitable to commute, try to do so.
if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
- commuted = true;
+ Commuted = true;
++NumCommuted;
if (AggressiveCommute)
++NumAggrCommuted;
@@ -1232,7 +1232,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// If there is one more use of regB later in the same MBB, consider
// re-schedule this MI below it.
- if (!commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
+ if (!Commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
++NumReSchedDowns;
return true;
}
@@ -1250,7 +1250,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
}
// Return if it is commuted but 3 addr conversion is failed.
- if (commuted)
+ if (Commuted)
return false;
// If there is one more use of regB later in the same MBB, consider
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 2912bdd63426..02341b4d66b8 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -163,7 +163,6 @@ class VirtRegRewriter : public MachineFunctionPass {
SlotIndexes *Indexes;
LiveIntervals *LIS;
VirtRegMap *VRM;
- SparseSet<unsigned> PhysRegs;
void rewrite();
void addMBBLiveIns();
@@ -319,54 +318,15 @@ void VirtRegRewriter::rewrite() {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
- SmallPtrSet<const MachineInstr *, 4> NoReturnInsts;
-
- // Here we have a SparseSet to hold which PhysRegs are actually encountered
- // in the MF we are about to iterate over so that later when we call
- // setPhysRegUsed, we are only doing it for physRegs that were actually found
- // in the program and not for all of the possible physRegs for the given
- // target architecture. If the target has a lot of physRegs, then for a small
- // program there will be a significant compile time reduction here.
- PhysRegs.clear();
- PhysRegs.setUniverse(TRI->getNumRegs());
-
- // The function with uwtable should guarantee that the stack unwinder
- // can unwind the stack to the previous frame. Thus, we can't apply the
- // noreturn optimization if the caller function has uwtable attribute.
- bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable);
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
DEBUG(MBBI->print(dbgs(), Indexes));
- bool IsExitBB = MBBI->succ_empty();
for (MachineBasicBlock::instr_iterator
MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
MachineInstr *MI = MII;
++MII;
- // Check if this instruction is a call to a noreturn function. If this
- // is a call to noreturn function and we don't need the stack unwinding
- // functionality (i.e. this function does not have uwtable attribute and
- // the callee function has the nounwind attribute), then we can ignore
- // the definitions set by this instruction.
- if (!HasUWTable && IsExitBB && MI->isCall()) {
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- MachineOperand &MO = *MOI;
- if (!MO.isGlobal())
- continue;
- const Function *Func = dyn_cast<Function>(MO.getGlobal());
- if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) ||
- // We need to keep correct unwind information
- // even if the function will not return, since the
- // runtime may need it.
- !Func->hasFnAttribute(Attribute::NoUnwind))
- continue;
- NoReturnInsts.insert(MI);
- break;
- }
- }
-
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
MachineOperand &MO = *MOI;
@@ -375,15 +335,6 @@ void VirtRegRewriter::rewrite() {
if (MO.isRegMask())
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
- // If we encounter a VirtReg or PhysReg then get at the PhysReg and add
- // it to the physreg bitset. Later we use only the PhysRegs that were
- // actually encountered in the MF to populate the MRI's used physregs.
- if (MO.isReg() && MO.getReg())
- PhysRegs.insert(
- TargetRegisterInfo::isVirtualRegister(MO.getReg()) ?
- VRM->getPhys(MO.getReg()) :
- MO.getReg());
-
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
unsigned VirtReg = MO.getReg();
@@ -470,29 +421,5 @@ void VirtRegRewriter::rewrite() {
}
}
}
-
- // Tell MRI about physical registers in use.
- if (NoReturnInsts.empty()) {
- for (SparseSet<unsigned>::iterator
- RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI)
- if (!MRI->reg_nodbg_empty(*RegI))
- MRI->setPhysRegUsed(*RegI);
- } else {
- for (SparseSet<unsigned>::iterator
- I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) {
- unsigned Reg = *I;
- if (MRI->reg_nodbg_empty(Reg))
- continue;
- // Check if this register has a use that will impact the rest of the
- // code. Uses in debug and noreturn instructions do not impact the
- // generated code.
- for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) {
- if (!NoReturnInsts.count(&It)) {
- MRI->setPhysRegUsed(Reg);
- break;
- }
- }
- }
- }
}
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index dbc0d91a01e2..0d26ed333ca7 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -155,7 +155,7 @@ private:
// outlined but before the outlined code is pruned from the parent function.
DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks;
- // Map from outlined handler to call to llvm.frameaddress(1). Only used for
+ // Map from outlined handler to call to parent local address. Only used for
// 32-bit EH.
DenseMap<Function *, Value *> HandlerToParentFP;
@@ -533,9 +533,9 @@ void WinEHPrepare::findSEHEHReturnPoints(
BasicBlock *NextBB;
Constant *Selector;
if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) {
- // Split the edge if there is a phi node. Returning from EH to a phi node
- // is just as impossible as having a phi after an indirectbr.
- if (isa<PHINode>(CatchHandler->begin())) {
+ // Split the edge if there are multiple predecessors. This creates a place
+ // where we can insert EH recovery code.
+ if (!CatchHandler->getSinglePredecessor()) {
DEBUG(dbgs() << "splitting EH return edge from " << BB->getName()
<< " to " << CatchHandler->getName() << '\n');
BBI = CatchHandler = SplitCriticalEdge(
@@ -616,6 +616,26 @@ void WinEHPrepare::demoteValuesLiveAcrossHandlers(
// identifyEHBlocks() should have been called before this function.
assert(!NormalBlocks.empty());
+ // Try to avoid demoting EH pointer and selector values. They get in the way
+ // of our pattern matching.
+ SmallPtrSet<Instruction *, 10> EHVals;
+ for (BasicBlock &BB : F) {
+ LandingPadInst *LP = BB.getLandingPadInst();
+ if (!LP)
+ continue;
+ EHVals.insert(LP);
+ for (User *U : LP->users()) {
+ auto *EI = dyn_cast<ExtractValueInst>(U);
+ if (!EI)
+ continue;
+ EHVals.insert(EI);
+ for (User *U2 : EI->users()) {
+ if (auto *PN = dyn_cast<PHINode>(U2))
+ EHVals.insert(PN);
+ }
+ }
+ }
+
SetVector<Argument *> ArgsToDemote;
SetVector<Instruction *> InstrsToDemote;
for (BasicBlock &BB : F) {
@@ -641,7 +661,11 @@ void WinEHPrepare::demoteValuesLiveAcrossHandlers(
continue;
}
+ // Don't demote EH values.
auto *OpI = cast<Instruction>(Op);
+ if (EHVals.count(OpI))
+ continue;
+
BasicBlock *OpBB = OpI->getParent();
// If a value is produced and consumed in the same BB, we don't need to
// demote it.
@@ -822,7 +846,8 @@ bool WinEHPrepare::prepareExceptionHandlers(
LPad->replaceAllUsesWith(UndefValue::get(LPad->getType()));
// Rewrite uses of the exception pointer to loads of an alloca.
- for (Instruction *E : SEHCodeUses) {
+ while (!SEHCodeUses.empty()) {
+ Instruction *E = SEHCodeUses.pop_back_val();
SmallVector<Use *, 4> Uses;
for (Use &U : E->uses())
Uses.push_back(&U);
@@ -830,13 +855,10 @@ bool WinEHPrepare::prepareExceptionHandlers(
auto *I = cast<Instruction>(U->getUser());
if (isa<ResumeInst>(I))
continue;
- LoadInst *LI;
if (auto *Phi = dyn_cast<PHINode>(I))
- LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false,
- Phi->getIncomingBlock(*U));
+ SEHCodeUses.push_back(Phi);
else
- LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I);
- U->set(LI);
+ U->set(new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I));
}
E->replaceAllUsesWith(UndefValue::get(E->getType()));
E->eraseFromParent();
@@ -953,16 +975,16 @@ bool WinEHPrepare::prepareExceptionHandlers(
Builder.SetInsertPoint(Entry->getFirstInsertionPt());
Function *FrameEscapeFn =
- Intrinsic::getDeclaration(M, Intrinsic::frameescape);
+ Intrinsic::getDeclaration(M, Intrinsic::localescape);
Function *RecoverFrameFn =
- Intrinsic::getDeclaration(M, Intrinsic::framerecover);
+ Intrinsic::getDeclaration(M, Intrinsic::localrecover);
SmallVector<Value *, 8> AllocasToEscape;
- // Scan the entry block for an existing call to llvm.frameescape. We need to
+ // Scan the entry block for an existing call to llvm.localescape. We need to
// keep escaping those objects.
for (Instruction &I : F.front()) {
auto *II = dyn_cast<IntrinsicInst>(&I);
- if (II && II->getIntrinsicID() == Intrinsic::frameescape) {
+ if (II && II->getIntrinsicID() == Intrinsic::localescape) {
auto Args = II->arg_operands();
AllocasToEscape.append(Args.begin(), Args.end());
II->eraseFromParent();
@@ -971,7 +993,7 @@ bool WinEHPrepare::prepareExceptionHandlers(
}
// Finally, replace all of the temporary allocas for frame variables used in
- // the outlined handlers with calls to llvm.framerecover.
+ // the outlined handlers with calls to llvm.localrecover.
for (auto &VarInfoEntry : FrameVarInfo) {
Value *ParentVal = VarInfoEntry.first;
TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second;
@@ -992,7 +1014,7 @@ bool WinEHPrepare::prepareExceptionHandlers(
llvm::Value *FP = HandlerToParentFP[HandlerFn];
assert(FP);
- // FIXME: Sink this framerecover into the blocks where it is used.
+ // FIXME: Sink this localrecover into the blocks where it is used.
Builder.SetInsertPoint(TempAlloca);
Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
Value *RecoverArgs[] = {
@@ -1014,7 +1036,7 @@ bool WinEHPrepare::prepareExceptionHandlers(
}
} // End for each FrameVarInfo entry.
- // Insert 'call void (...)* @llvm.frameescape(...)' at the end of the entry
+ // Insert 'call void (...)* @llvm.localescape(...)' at the end of the entry
// block.
Builder.SetInsertPoint(&F.getEntryBlock().back());
Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
@@ -1595,9 +1617,8 @@ void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
VMap[Extract] = SelectorValue;
}
-static bool isFrameAddressCall(const Value *V) {
- return match(const_cast<Value *>(V),
- m_Intrinsic<Intrinsic::frameaddress>(m_SpecificInt(0)));
+static bool isLocalAddressCall(const Value *V) {
+ return match(const_cast<Value *>(V), m_Intrinsic<Intrinsic::localaddress>());
}
CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
@@ -1639,9 +1660,9 @@ CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
return handleTypeIdFor(VMap, Inst, NewBB);
- // When outlining llvm.frameaddress(i32 0), remap that to the second argument,
+ // When outlining llvm.localaddress(), remap that to the second argument,
// which is the FP of the parent.
- if (isFrameAddressCall(Inst)) {
+ if (isLocalAddressCall(Inst)) {
VMap[Inst] = ParentFP;
return CloningDirector::SkipInstruction;
}
@@ -1961,7 +1982,7 @@ Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
// If we're asked to materialize a static alloca, we temporarily create an
// alloca in the outlined function and add this to the FrameVarInfo map. When
// all the outlining is complete, we'll replace these temporary allocas with
- // calls to llvm.framerecover.
+ // calls to llvm.localrecover.
if (auto *AV = dyn_cast<AllocaInst>(V)) {
assert(AV->isStaticAlloca() &&
"cannot materialize un-demoted dynamic alloca");
@@ -1991,7 +2012,7 @@ void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) {
// of a catch parameter, add a sentinel to the multimap to indicate that it's
// used from another handler. This will prevent us from trying to sink the
// alloca into the handler and ensure that the catch parameter is present in
- // the call to llvm.frameescape.
+ // the call to llvm.localescape.
FrameVarInfo[V].push_back(getCatchObjectSentinel());
}
@@ -2233,16 +2254,16 @@ static void createCleanupHandler(LandingPadActions &Actions,
static CallSite matchOutlinedFinallyCall(BasicBlock *BB,
Instruction *MaybeCall) {
// Look for finally blocks that Clang has already outlined for us.
- // %fp = call i8* @llvm.frameaddress(i32 0)
+ // %fp = call i8* @llvm.localaddress()
// call void @"fin$parent"(iN 1, i8* %fp)
- if (isFrameAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
+ if (isLocalAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
MaybeCall = MaybeCall->getNextNode();
CallSite FinallyCall(MaybeCall);
if (!FinallyCall || FinallyCall.arg_size() != 2)
return CallSite();
if (!match(FinallyCall.getArgument(0), m_SpecificInt(1)))
return CallSite();
- if (!isFrameAddressCall(FinallyCall.getArgument(1)))
+ if (!isLocalAddressCall(FinallyCall.getArgument(1)))
return CallSite();
return FinallyCall;
}
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index c25ddad33b76..96bcf15e0af0 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -677,7 +677,13 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
// First calculate the address of the symbol or section as it appears
// in the objct file
if (Sym != Obj.symbol_end()) {
- Sym->getAddress(SymAddr);
+ ErrorOr<uint64_t> SymAddrOrErr = Sym->getAddress();
+ if (std::error_code EC = SymAddrOrErr.getError()) {
+ errs() << "error: failed to compute symbol address: "
+ << EC.message() << '\n';
+ continue;
+ }
+ SymAddr = *SymAddrOrErr;
// Also remember what section this symbol is in for later
Sym->getSection(RSec);
} else if (auto *MObj = dyn_cast<MachOObjectFile>(&Obj)) {
diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
index 348308897dc4..331d2141b0e2 100644
--- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
+++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
@@ -3,4 +3,6 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMIntelJITEvents
IntelJITEventListener.cpp
jitprofiling.c
- )
+
+ LINK_LIBS pthread ${CMAKE_DL_LIBS}
+)
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 907144007fdd..a131763193c0 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -113,63 +113,59 @@ void IntelJITEventListener::NotifyObjectEmitted(
std::vector<LineNumberInfo> LineInfo;
std::string SourceFileName;
- if (Sym.getType() == SymbolRef::ST_Function) {
- ErrorOr<StringRef> Name = Sym.getName();
- if (!Name)
- continue;
-
- uint64_t Addr;
- if (Sym.getAddress(Addr))
- continue;
- uint64_t Size = P.second;
-
- // Record this address in a local vector
- Functions.push_back((void*)Addr);
-
- // Build the function loaded notification message
- iJIT_Method_Load FunctionMessage =
- FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size);
- if (Context) {
- DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
- DILineInfoTable::iterator Begin = Lines.begin();
- DILineInfoTable::iterator End = Lines.end();
- for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
- LineInfo.push_back(DILineInfoToIntelJITFormat((uintptr_t)Addr,
- It->first,
- It->second));
- }
- if (LineInfo.size() == 0) {
- FunctionMessage.source_file_name = 0;
- FunctionMessage.line_number_size = 0;
- FunctionMessage.line_number_table = 0;
- } else {
- // Source line information for the address range is provided as
- // a code offset for the start of the corresponding sub-range and
- // a source line. JIT API treats offsets in LineNumberInfo structures
- // as the end of the corresponding code region. The start of the code
- // is taken from the previous element. Need to shift the elements.
-
- LineNumberInfo last = LineInfo.back();
- last.Offset = FunctionMessage.method_size;
- LineInfo.push_back(last);
- for (size_t i = LineInfo.size() - 2; i > 0; --i)
- LineInfo[i].LineNumber = LineInfo[i - 1].LineNumber;
-
- SourceFileName = Lines.front().second.FileName;
- FunctionMessage.source_file_name = const_cast<char *>(SourceFileName.c_str());
- FunctionMessage.line_number_size = LineInfo.size();
- FunctionMessage.line_number_table = &*LineInfo.begin();
- }
- } else {
- FunctionMessage.source_file_name = 0;
- FunctionMessage.line_number_size = 0;
- FunctionMessage.line_number_table = 0;
- }
-
- Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
- &FunctionMessage);
- MethodIDs[(void*)Addr] = FunctionMessage.method_id;
+ if (Sym.getType() != SymbolRef::ST_Function)
+ continue;
+
+ ErrorOr<StringRef> Name = Sym.getName();
+ if (!Name)
+ continue;
+
+ ErrorOr<uint64_t> AddrOrErr = Sym.getAddress();
+ if (AddrOrErr.getError())
+ continue;
+ uint64_t Addr = *AddrOrErr;
+ uint64_t Size = P.second;
+
+ // Record this address in a local vector
+ Functions.push_back((void*)Addr);
+
+ // Build the function loaded notification message
+ iJIT_Method_Load FunctionMessage =
+ FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size);
+ DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
+ DILineInfoTable::iterator Begin = Lines.begin();
+ DILineInfoTable::iterator End = Lines.end();
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+ LineInfo.push_back(
+ DILineInfoToIntelJITFormat((uintptr_t)Addr, It->first, It->second));
}
+ if (LineInfo.size() == 0) {
+ FunctionMessage.source_file_name = 0;
+ FunctionMessage.line_number_size = 0;
+ FunctionMessage.line_number_table = 0;
+ } else {
+ // Source line information for the address range is provided as
+ // a code offset for the start of the corresponding sub-range and
+ // a source line. JIT API treats offsets in LineNumberInfo structures
+ // as the end of the corresponding code region. The start of the code
+ // is taken from the previous element. Need to shift the elements.
+
+ LineNumberInfo last = LineInfo.back();
+ last.Offset = FunctionMessage.method_size;
+ LineInfo.push_back(last);
+ for (size_t i = LineInfo.size() - 2; i > 0; --i)
+ LineInfo[i].LineNumber = LineInfo[i - 1].LineNumber;
+
+ SourceFileName = Lines.front().second.FileName;
+ FunctionMessage.source_file_name =
+ const_cast<char *>(SourceFileName.c_str());
+ FunctionMessage.line_number_size = LineInfo.size();
+ FunctionMessage.line_number_table = &*LineInfo.begin();
+ }
+
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ &FunctionMessage);
+ MethodIDs[(void*)Addr] = FunctionMessage.method_id;
}
// To support object unload notification, we need to keep a list of
diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
index 1247cbd94930..afea3ecccda4 100644
--- a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
+++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
@@ -21,4 +21,4 @@
type = OptionalLibrary
name = IntelJITEvents
parent = ExecutionEngine
-required_libraries = Core DebugInfoDWARF Support
+required_libraries = Core DebugInfoDWARF Support Object ExecutionEngine
diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
index e30516eb3b01..7d5550046a56 100644
--- a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
+++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
@@ -21,3 +21,4 @@
type = OptionalLibrary
name = OProfileJIT
parent = ExecutionEngine
+required_libraries = Support Object ExecutionEngine
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index b72033805269..324d07118704 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -88,24 +88,27 @@ void OProfileJITEventListener::NotifyObjectEmitted(
// Use symbol info to iterate functions in the object.
for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
SymbolRef Sym = P.first;
- if (Sym.getType() == SymbolRef::ST_Function) {
- StringRef Name;
- uint64_t Addr;
- if (Sym.getName(Name))
- continue;
- if (Sym.getAddress(Addr))
- continue;
- uint64_t Size = P.second;
-
- if (Wrapper->op_write_native_code(Name.data(), Addr, (void*)Addr, Size)
- == -1) {
- DEBUG(dbgs() << "Failed to tell OProfile about native function "
- << Name << " at ["
- << (void*)Addr << "-" << ((char*)Addr + Size) << "]\n");
- continue;
- }
- // TODO: support line number info (similar to IntelJITEventListener.cpp)
+ if (Sym.getType() != SymbolRef::ST_Function)
+ continue;
+
+ ErrorOr<StringRef> NameOrErr = Sym.getName();
+ if (NameOrErr.getError())
+ continue;
+ StringRef Name = *NameOrErr;
+ ErrorOr<uint64_t> AddrOrErr = Sym.getAddress();
+ if (AddrOrErr.getError())
+ continue;
+ uint64_t Addr = *AddrOrErr;
+ uint64_t Size = P.second;
+
+ if (Wrapper->op_write_native_code(Name.data(), Addr, (void *)Addr, Size) ==
+ -1) {
+ DEBUG(dbgs() << "Failed to tell OProfile about native function " << Name
+ << " at [" << (void *)Addr << "-" << ((char *)Addr + Size)
+ << "]\n");
+ continue;
}
+ // TODO: support line number info (similar to IntelJITEventListener.cpp)
}
DebugObjects[Obj.getData().data()] = std::move(DebugObjOwner);
@@ -126,8 +129,10 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
E = DebugObj.symbol_end();
I != E; ++I) {
if (I->getType() == SymbolRef::ST_Function) {
- uint64_t Addr;
- if (I->getAddress(Addr)) continue;
+ ErrorOr<uint64_t> AddrOrErr = I->getAddress();
+ if (AddrOrErr.getError())
+ continue;
+ uint64_t Addr = *AddrOrErr;
if (Wrapper->op_unload_native_code(Addr) == -1) {
DEBUG(dbgs()
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index fa501824e04a..93287a3a4e71 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -113,28 +113,12 @@ void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
llvm_unreachable("Attempting to remap address of unknown section!");
}
-static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
- uint64_t Address;
- if (std::error_code EC = Sym.getAddress(Address))
+static std::error_code getOffset(const SymbolRef &Sym, SectionRef Sec,
+ uint64_t &Result) {
+ ErrorOr<uint64_t> AddressOrErr = Sym.getAddress();
+ if (std::error_code EC = AddressOrErr.getError())
return EC;
-
- if (Address == UnknownAddress) {
- Result = UnknownAddress;
- return std::error_code();
- }
-
- const ObjectFile *Obj = Sym.getObject();
- section_iterator SecI(Obj->section_begin());
- if (std::error_code EC = Sym.getSection(SecI))
- return EC;
-
- if (SecI == Obj->section_end()) {
- Result = UnknownAddress;
- return std::error_code();
- }
-
- uint64_t SectionAddress = SecI->getAddress();
- Result = Address - SectionAddress;
+ Result = *AddressOrErr - Sec.getAddress();
return std::error_code();
}
@@ -184,12 +168,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
ErrorOr<StringRef> NameOrErr = I->getName();
Check(NameOrErr.getError());
StringRef Name = *NameOrErr;
- uint64_t SectOffset;
- Check(getOffset(*I, SectOffset));
section_iterator SI = Obj.section_end();
Check(I->getSection(SI));
if (SI == Obj.section_end())
continue;
+ uint64_t SectOffset;
+ Check(getOffset(*I, *SI, SectOffset));
StringRef SectionData;
Check(SI->getContents(SectionData));
bool IsCode = SI->isText();
@@ -814,12 +798,16 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
report_fatal_error("Program used external function '" + Name +
"' which could not be resolved!");
- DEBUG(dbgs() << "Resolving relocations Name: " << Name << "\t"
- << format("0x%lx", Addr) << "\n");
- // This list may have been updated when we called getSymbolAddress, so
- // don't change this code to get the list earlier.
- RelocationList &Relocs = i->second;
- resolveRelocationList(Relocs, Addr);
+ // If Resolver returned UINT64_MAX, the client wants to handle this symbol
+ // manually and we shouldn't resolve its relocations.
+ if (Addr != UINT64_MAX) {
+ DEBUG(dbgs() << "Resolving relocations Name: " << Name << "\t"
+ << format("0x%lx", Addr) << "\n");
+ // This list may have been updated when we called getSymbolAddress, so
+ // don't change this code to get the list earlier.
+ RelocationList &Relocs = i->second;
+ resolveRelocationList(Relocs, Addr);
+ }
}
ExternalSymbolRelocations.erase(i);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index 957571b092da..ae199b720223 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -727,7 +727,9 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
}
bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
- return getRTDyld().getSymbolLocalAddress(Symbol) != nullptr;
+ if (getRTDyld().getSymbolLocalAddress(Symbol))
+ return true;
+ return !!getRTDyld().Resolver.findSymbol(Symbol);
}
uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index f5069c005857..3787950b3b08 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -511,11 +511,54 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
Insn |= Value & 0xffff;
writeBytesUnaligned(Insn, TargetPtr, 4);
break;
- case ELF::R_MIPS_PC32:
+ case ELF::R_MIPS_PC32: {
+ uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ writeBytesUnaligned(Value - FinalAddress, (uint8_t *)TargetPtr, 4);
+ break;
+ }
+ case ELF::R_MIPS_PC16: {
+ uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ Insn &= 0xffff0000;
+ Insn |= ((Value - FinalAddress) >> 2) & 0xffff;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ }
+ case ELF::R_MIPS_PC19_S2: {
+ uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ Insn &= 0xfff80000;
+ Insn |= ((Value - (FinalAddress & ~0x3)) >> 2) & 0x7ffff;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ }
+ case ELF::R_MIPS_PC21_S2: {
+ uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ Insn &= 0xffe00000;
+ Insn |= ((Value - FinalAddress) >> 2) & 0x1fffff;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ }
+ case ELF::R_MIPS_PC26_S2: {
+ uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ Insn &= 0xfc000000;
+ Insn |= ((Value - FinalAddress) >> 2) & 0x3ffffff;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ }
+ case ELF::R_MIPS_PCHI16: {
uint32_t FinalAddress = (Section.LoadAddress + Offset);
- writeBytesUnaligned(Value + Addend - FinalAddress, (uint8_t *)TargetPtr, 4);
+ Insn &= 0xffff0000;
+ Insn |= ((Value - FinalAddress + 0x8000) >> 16) & 0xffff;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
break;
}
+ case ELF::R_MIPS_PCLO16: {
+ uint32_t FinalAddress = (Section.LoadAddress + Offset);
+ Insn &= 0xffff0000;
+ Insn |= (Value - FinalAddress) & 0xffff;
+ writeBytesUnaligned(Insn, TargetPtr, 4);
+ break;
+ }
+ }
}
void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) {
@@ -1263,12 +1306,24 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
Section.StubOffset += getMaxStubSize();
}
} else {
- if (RelType == ELF::R_MIPS_HI16)
+ // FIXME: Calculate correct addends for R_MIPS_HI16, R_MIPS_LO16,
+ // R_MIPS_PCHI16 and R_MIPS_PCLO16 relocations.
+ if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16)
Value.Addend += (Opcode & 0x0000ffff) << 16;
else if (RelType == ELF::R_MIPS_LO16)
Value.Addend += (Opcode & 0x0000ffff);
else if (RelType == ELF::R_MIPS_32)
Value.Addend += Opcode;
+ else if (RelType == ELF::R_MIPS_PCLO16)
+ Value.Addend += SignExtend32<16>((Opcode & 0x0000ffff));
+ else if (RelType == ELF::R_MIPS_PC16)
+ Value.Addend += SignExtend32<18>((Opcode & 0x0000ffff) << 2);
+ else if (RelType == ELF::R_MIPS_PC19_S2)
+ Value.Addend += SignExtend32<21>((Opcode & 0x0007ffff) << 2);
+ else if (RelType == ELF::R_MIPS_PC21_S2)
+ Value.Addend += SignExtend32<23>((Opcode & 0x001fffff) << 2);
+ else if (RelType == ELF::R_MIPS_PC26_S2)
+ Value.Addend += SignExtend32<28>((Opcode & 0x03ffffff) << 2);
processSimpleRelocation(SectionID, Offset, RelType, Value);
}
} else if (IsMipsN64ABI) {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 74b13d60a984..c0741141757c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -89,19 +89,11 @@ RelocationValueRef RuntimeDyldMachO::getRelocationValueRef(
}
void RuntimeDyldMachO::makeValueAddendPCRel(RelocationValueRef &Value,
- const ObjectFile &BaseTObj,
const relocation_iterator &RI,
unsigned OffsetToNextPC) {
- const MachOObjectFile &Obj =
- static_cast<const MachOObjectFile &>(BaseTObj);
- MachO::any_relocation_info RelInfo =
- Obj.getRelocation(RI->getRawDataRefImpl());
-
- bool IsPCRel = Obj.getAnyRelocationPCRel(RelInfo);
- if (IsPCRel) {
- ErrorOr<uint64_t> RelocAddr = RI->getAddress();
- Value.Offset += *RelocAddr + OffsetToNextPC;
- }
+ auto &O = *cast<MachOObjectFile>(RI->getObject());
+ section_iterator SecI = O.getRelocationRelocatedSection(RI);
+ Value.Offset += RI->getOffset() + OffsetToNextPC + SecI->getAddress();
}
void RuntimeDyldMachO::dumpRelocationToResolve(const RelocationEntry &RE,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 36ba8d1b93e7..0d7364f78597 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -95,7 +95,6 @@ protected:
/// Make the RelocationValueRef addend PC-relative.
void makeValueAddendPCRel(RelocationValueRef &Value,
- const ObjectFile &BaseTObj,
const relocation_iterator &RI,
unsigned OffsetToNextPC);
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 99fd6e333b47..7bf764114bae 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -284,7 +284,7 @@ public:
bool IsExtern = Obj.getPlainRelocationExternal(RelInfo);
if (!IsExtern && RE.IsPCRel)
- makeValueAddendPCRel(Value, Obj, RelI, 1 << RE.Size);
+ makeValueAddendPCRel(Value, RelI, 1 << RE.Size);
RE.Addend = Value.Offset;
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 0d9445e84f09..0a24bb2f5eae 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -74,7 +74,7 @@ public:
getRelocationValueRef(Obj, RelI, RE, ObjSectionToID));
if (RE.IsPCRel)
- makeValueAddendPCRel(Value, Obj, RelI, 8);
+ makeValueAddendPCRel(Value, RelI, 8);
if ((RE.RelType & 0xf) == MachO::ARM_RELOC_BR24)
processBranchRelocation(RE, Value, Stubs);
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index aceb304abb1e..569a078d7f3d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -68,7 +68,7 @@ public:
// Value.Addend += RelocAddr + 4;
// }
if (RE.IsPCRel)
- makeValueAddendPCRel(Value, Obj, RelI, 1 << RE.Size);
+ makeValueAddendPCRel(Value, RelI, 1 << RE.Size);
RE.Addend = Value.Offset;
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 4b3b01ba3c96..dd56e72f9144 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -50,7 +50,7 @@ public:
bool IsExtern = Obj.getPlainRelocationExternal(RelInfo);
if (!IsExtern && RE.IsPCRel)
- makeValueAddendPCRel(Value, Obj, RelI, 1 << RE.Size);
+ makeValueAddendPCRel(Value, RelI, 1 << RE.Size);
if (RE.RelType == MachO::X86_64_RELOC_GOT ||
RE.RelType == MachO::X86_64_RELOC_GOT_LOAD)
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index c3032f4ffc79..546a98670a29 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -190,6 +190,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "sanitize_address";
if (hasAttribute(Attribute::AlwaysInline))
return "alwaysinline";
+ if (hasAttribute(Attribute::ArgMemOnly))
+ return "argmemonly";
if (hasAttribute(Attribute::Builtin))
return "builtin";
if (hasAttribute(Attribute::ByVal))
@@ -447,6 +449,9 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
llvm_unreachable("dereferenceable_or_null attribute not supported in raw "
"format");
break;
+ case Attribute::ArgMemOnly:
+ llvm_unreachable("argmemonly attribute not supported in raw format");
+ break;
}
llvm_unreachable("Unsupported attribute type");
}
@@ -1356,7 +1361,8 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
I = Attribute::AttrKind(I + 1)) {
if (I == Attribute::Dereferenceable ||
- I == Attribute::DereferenceableOrNull)
+ I == Attribute::DereferenceableOrNull ||
+ I == Attribute::ArgMemOnly)
continue;
if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) {
Attrs[I] = true;
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index 70a55186ea9a..f1c6ebd4846e 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -229,6 +229,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
NewFn = nullptr;
bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
+ assert(F != NewFn && "Intrinsic function upgraded to the same function");
// Upgrade intrinsic attributes. This does not change the function.
if (NewFn)
@@ -710,16 +711,14 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
// Upgrade the function and check if it is a totaly new function.
Function *NewFn;
if (UpgradeIntrinsicFunction(F, NewFn)) {
- if (NewFn != F) {
- // Replace all uses to the old function with the new one if necessary.
- for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
- UI != UE; ) {
- if (CallInst *CI = dyn_cast<CallInst>(*UI++))
- UpgradeIntrinsicCall(CI, NewFn);
- }
- // Remove old function, no longer used, from the module.
- F->eraseFromParent();
+ // Replace all uses to the old function with the new one if necessary.
+ for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
+ UI != UE;) {
+ if (CallInst *CI = dyn_cast<CallInst>(*UI++))
+ UpgradeIntrinsicCall(CI, NewFn);
}
+ // Remove old function, no longer used, from the module.
+ F->eraseFromParent();
}
}
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 77cb10d5b6ba..0a0449434a7b 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -163,47 +163,40 @@ CallInst *BasicBlock::getTerminatingMustTailCall() {
}
Instruction* BasicBlock::getFirstNonPHI() {
- BasicBlock::iterator i = begin();
- // All valid basic blocks should have a terminator,
- // which is not a PHINode. If we have an invalid basic
- // block we'll get an assertion failure when dereferencing
- // a past-the-end iterator.
- while (isa<PHINode>(i)) ++i;
- return &*i;
+ for (Instruction &I : *this)
+ if (!isa<PHINode>(I))
+ return &I;
+ return nullptr;
}
Instruction* BasicBlock::getFirstNonPHIOrDbg() {
- BasicBlock::iterator i = begin();
- // All valid basic blocks should have a terminator,
- // which is not a PHINode. If we have an invalid basic
- // block we'll get an assertion failure when dereferencing
- // a past-the-end iterator.
- while (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) ++i;
- return &*i;
+ for (Instruction &I : *this)
+ if (!isa<PHINode>(I) && !isa<DbgInfoIntrinsic>(I))
+ return &I;
+ return nullptr;
}
Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
- // All valid basic blocks should have a terminator,
- // which is not a PHINode. If we have an invalid basic
- // block we'll get an assertion failure when dereferencing
- // a past-the-end iterator.
- BasicBlock::iterator i = begin();
- for (;; ++i) {
- if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i))
+ for (Instruction &I : *this) {
+ if (isa<PHINode>(I) || isa<DbgInfoIntrinsic>(I))
continue;
- const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i);
- if (!II)
- break;
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
- break;
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
+
+ return &I;
}
- return &*i;
+ return nullptr;
}
BasicBlock::iterator BasicBlock::getFirstInsertionPt() {
- iterator InsertPt = getFirstNonPHI();
+ Instruction *FirstNonPHI = getFirstNonPHI();
+ if (!FirstNonPHI)
+ return end();
+
+ iterator InsertPt = FirstNonPHI;
if (isa<LandingPadInst>(InsertPt)) ++InsertPt;
return InsertPt;
}
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 23e923d41126..e0e729d534bd 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -1691,6 +1691,14 @@ void LLVMDeleteFunction(LLVMValueRef Fn) {
unwrap<Function>(Fn)->eraseFromParent();
}
+LLVMValueRef LLVMGetPersonalityFn(LLVMValueRef Fn) {
+ return wrap(unwrap<Function>(Fn)->getPersonalityFn());
+}
+
+void LLVMSetPersonalityFn(LLVMValueRef Fn, LLVMValueRef PersonalityFn) {
+ unwrap<Function>(Fn)->setPersonalityFn(unwrap<Constant>(PersonalityFn));
+}
+
unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
if (Function *F = dyn_cast<Function>(unwrap(Fn)))
return F->getIntrinsicID();
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 6a3ff0e8e457..2a90e70af1a3 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -73,37 +73,47 @@ void DIBuilder::trackIfUnresolved(MDNode *N) {
}
void DIBuilder::finalize() {
- if (CUNode) {
- CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes));
-
- SmallVector<Metadata *, 16> RetainValues;
- // Declarations and definitions of the same type may be retained. Some
- // clients RAUW these pairs, leaving duplicates in the retained types
- // list. Use a set to remove the duplicates while we transform the
- // TrackingVHs back into Values.
- SmallPtrSet<Metadata *, 16> RetainSet;
- for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
- if (RetainSet.insert(AllRetainTypes[I]).second)
- RetainValues.push_back(AllRetainTypes[I]);
+ if (!CUNode) {
+ assert(!AllowUnresolvedNodes &&
+ "creating type nodes without a CU is not supported");
+ return;
+ }
+
+ CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes));
+
+ SmallVector<Metadata *, 16> RetainValues;
+ // Declarations and definitions of the same type may be retained. Some
+ // clients RAUW these pairs, leaving duplicates in the retained types
+ // list. Use a set to remove the duplicates while we transform the
+ // TrackingVHs back into Values.
+ SmallPtrSet<Metadata *, 16> RetainSet;
+ for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
+ if (RetainSet.insert(AllRetainTypes[I]).second)
+ RetainValues.push_back(AllRetainTypes[I]);
+
+ if (!RetainValues.empty())
CUNode->replaceRetainedTypes(MDTuple::get(VMContext, RetainValues));
- DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
+ DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
+ if (!AllSubprograms.empty())
CUNode->replaceSubprograms(SPs.get());
- for (auto *SP : SPs) {
- if (MDTuple *Temp = SP->getVariables().get()) {
- const auto &PV = PreservedVariables.lookup(SP);
- SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end());
- DINodeArray AV = getOrCreateArray(Variables);
- TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
- }
+
+ for (auto *SP : SPs) {
+ if (MDTuple *Temp = SP->getVariables().get()) {
+ const auto &PV = PreservedVariables.lookup(SP);
+ SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end());
+ DINodeArray AV = getOrCreateArray(Variables);
+ TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
}
+ }
+ if (!AllGVs.empty())
CUNode->replaceGlobalVariables(MDTuple::get(VMContext, AllGVs));
+ if (!AllImportedModules.empty())
CUNode->replaceImportedEntities(MDTuple::get(
VMContext, SmallVector<Metadata *, 16>(AllImportedModules.begin(),
AllImportedModules.end())));
- }
// Now that all temp nodes have been replaced or deleted, resolve remaining
// cycles.
@@ -585,7 +595,7 @@ DILocalVariable *DIBuilder::createLocalVariable(
DIType *Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) {
// FIXME: Why getNonCompileUnitScope()?
// FIXME: Why is "!Context" okay here?
- // FIXME: WHy doesn't this check for a subprogram or lexical block (AFAICT
+ // FIXME: Why doesn't this check for a subprogram or lexical block (AFAICT
// the only valid scopes)?
DIScope *Context = getNonCompileUnitScope(Scope);
@@ -593,7 +603,7 @@ DILocalVariable *DIBuilder::createLocalVariable(
VMContext, Tag, cast_or_null<DILocalScope>(Context), Name, File, LineNo,
DITypeRef::get(Ty), ArgNo, Flags);
if (AlwaysPreserve) {
- // The optimizer may remove local variable. If there is an interest
+ // The optimizer may remove local variables. If there is an interest
// to preserve variable info in such situation then stash it in a
// named mdnode.
DISubprogram *Fn = getDISubprogram(Scope);
@@ -857,7 +867,7 @@ void DIBuilder::replaceArrays(DICompositeType *&T, DINodeArray Elements,
if (!T->isResolved())
return;
- // If "T" is resolved, it may be due to a self-reference cycle. Track the
+ // If T is resolved, it may be due to a self-reference cycle. Track the
// arrays explicitly if they're unresolved, or else the cycles will be
// orphaned.
if (Elements)
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
index e3258895ea5e..b6a8bbcbe5fa 100644
--- a/lib/IR/Dominators.cpp
+++ b/lib/IR/Dominators.cpp
@@ -62,18 +62,14 @@ bool BasicBlockEdge::isSingleEdge() const {
//
//===----------------------------------------------------------------------===//
-TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>);
-TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
-
-#define LLVM_COMMA ,
-TEMPLATE_INSTANTIATION(void llvm::Calculate<Function LLVM_COMMA BasicBlock *>(
- DominatorTreeBase<GraphTraits<BasicBlock *>::NodeType> &DT LLVM_COMMA
- Function &F));
-TEMPLATE_INSTANTIATION(
- void llvm::Calculate<Function LLVM_COMMA Inverse<BasicBlock *> >(
- DominatorTreeBase<GraphTraits<Inverse<BasicBlock *> >::NodeType> &DT
- LLVM_COMMA Function &F));
-#undef LLVM_COMMA
+template class llvm::DomTreeNodeBase<BasicBlock>;
+template class llvm::DominatorTreeBase<BasicBlock>;
+
+template void llvm::Calculate<Function, BasicBlock *>(
+ DominatorTreeBase<GraphTraits<BasicBlock *>::NodeType> &DT, Function &F);
+template void llvm::Calculate<Function, Inverse<BasicBlock *>>(
+ DominatorTreeBase<GraphTraits<Inverse<BasicBlock *>>::NodeType> &DT,
+ Function &F);
// dominates - Return true if Def dominates a use in User. This performs
// the special checks necessary if Def and User are in the same basic block.
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 78d1adb5e700..f554d590284f 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -39,8 +39,6 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
// Value Class
//===----------------------------------------------------------------------===//
-const unsigned Value::NumUserOperandsBits;
-
static inline Type *checkType(Type *Ty) {
assert(Ty && "Value defined with a null type: Error!");
return Ty;
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 3c61165768f8..2a0a4ff393ed 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -184,12 +184,12 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// \brief Track unresolved string-based type references.
SmallDenseMap<const MDString *, const MDNode *, 32> UnresolvedTypeRefs;
- /// \brief Whether we've seen a call to @llvm.frameescape in this function
+ /// \brief Whether we've seen a call to @llvm.localescape in this function
/// already.
bool SawFrameEscape;
- /// Stores the count of how many objects were passed to llvm.frameescape for a
- /// given function and the largest index passed to llvm.framerecover.
+ /// Stores the count of how many objects were passed to llvm.localescape for a
+ /// given function and the largest index passed to llvm.localrecover.
DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
public:
@@ -438,6 +438,9 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
Assert(GVar && GVar->getValueType()->isArrayTy(),
"Only global arrays can have appending linkage!", GVar);
}
+
+ if (GV.isDeclarationForLinker())
+ Assert(!GV.hasComdat(), "Declaration may not be in a Comdat!", &GV);
}
void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
@@ -1270,7 +1273,8 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
I->getKindAsEnum() == Attribute::Cold ||
I->getKindAsEnum() == Attribute::OptimizeNone ||
I->getKindAsEnum() == Attribute::JumpTable ||
- I->getKindAsEnum() == Attribute::Convergent) {
+ I->getKindAsEnum() == Attribute::Convergent ||
+ I->getKindAsEnum() == Attribute::ArgMemOnly) {
if (!isFunction) {
CheckFailed("Attribute '" + I->getAsString() +
"' only applies to functions!", V);
@@ -1528,8 +1532,9 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
const Instruction &CI = *CS.getInstruction();
- Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory(),
- "gc.statepoint must read and write memory to preserve "
+ Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory() &&
+ !CS.onlyAccessesArgMemory(),
+ "gc.statepoint must read and write all memory to preserve "
"reordering restrictions required by safepoint semantics",
&CI);
@@ -1666,8 +1671,8 @@ void Verifier::verifyFrameRecoverIndices() {
unsigned EscapedObjectCount = Counts.second.first;
unsigned MaxRecoveredIndex = Counts.second.second;
Assert(MaxRecoveredIndex <= EscapedObjectCount,
- "all indices passed to llvm.framerecover must be less than the "
- "number of arguments passed ot llvm.frameescape in the parent "
+ "all indices passed to llvm.localrecover must be less than the "
+ "number of arguments passed ot llvm.localescape in the parent "
"function",
F);
}
@@ -2535,10 +2540,6 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Assert(isa<PointerType>(TargetTy),
"GEP base pointer is not a vector or a vector of pointers", &GEP);
Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
- Assert(GEP.getPointerOperandType()->isVectorTy() ==
- GEP.getType()->isVectorTy(),
- "Vector GEP must return a vector value", &GEP);
-
SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
Type *ElTy =
GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
@@ -2548,17 +2549,20 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GEP.getResultElementType() == ElTy,
"GEP is not of right type for indices!", &GEP, ElTy);
- if (GEP.getPointerOperandType()->isVectorTy()) {
+ if (GEP.getType()->isVectorTy()) {
// Additional checks for vector GEPs.
- unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements();
- Assert(GepWidth == GEP.getType()->getVectorNumElements(),
- "Vector GEP result width doesn't match operand's", &GEP);
+ unsigned GEPWidth = GEP.getType()->getVectorNumElements();
+ if (GEP.getPointerOperandType()->isVectorTy())
+ Assert(GEPWidth == GEP.getPointerOperandType()->getVectorNumElements(),
+ "Vector GEP result width doesn't match operand's", &GEP);
for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
Type *IndexTy = Idxs[i]->getType();
- Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!",
- &GEP);
- unsigned IndexWidth = IndexTy->getVectorNumElements();
- Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+ if (IndexTy->isVectorTy()) {
+ unsigned IndexWidth = IndexTy->getVectorNumElements();
+ Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP);
+ }
+ Assert(IndexTy->getScalarType()->isIntegerTy(),
+ "All GEP indices should be of integer type");
}
}
visitInstruction(GEP);
@@ -3276,32 +3280,32 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
"llvm.invariant.end parameter #2 must be a constant integer", CS);
break;
- case Intrinsic::frameescape: {
+ case Intrinsic::localescape: {
BasicBlock *BB = CS.getParent();
Assert(BB == &BB->getParent()->front(),
- "llvm.frameescape used outside of entry block", CS);
+ "llvm.localescape used outside of entry block", CS);
Assert(!SawFrameEscape,
- "multiple calls to llvm.frameescape in one function", CS);
+ "multiple calls to llvm.localescape in one function", CS);
for (Value *Arg : CS.args()) {
if (isa<ConstantPointerNull>(Arg))
continue; // Null values are allowed as placeholders.
auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
Assert(AI && AI->isStaticAlloca(),
- "llvm.frameescape only accepts static allocas", CS);
+ "llvm.localescape only accepts static allocas", CS);
}
FrameEscapeInfo[BB->getParent()].first = CS.getNumArgOperands();
SawFrameEscape = true;
break;
}
- case Intrinsic::framerecover: {
+ case Intrinsic::localrecover: {
Value *FnArg = CS.getArgOperand(0)->stripPointerCasts();
Function *Fn = dyn_cast<Function>(FnArg);
Assert(Fn && !Fn->isDeclaration(),
- "llvm.framerecover first "
+ "llvm.localrecover first "
"argument must be function defined in this module",
CS);
auto *IdxArg = dyn_cast<ConstantInt>(CS.getArgOperand(2));
- Assert(IdxArg, "idx argument of llvm.framerecover must be a constant int",
+ Assert(IdxArg, "idx argument of llvm.localrecover must be a constant int",
CS);
auto &Entry = FrameEscapeInfo[Fn];
Entry.second = unsigned(
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 6131c3180249..53ed4175f8e3 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -473,6 +473,9 @@ void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
if (def->hasComdat())
attr |= LTO_SYMBOL_COMDAT;
+ if (isa<GlobalAlias>(def))
+ attr |= LTO_SYMBOL_ALIAS;
+
auto Iter = _defines.insert(Name).first;
// fill information structure
diff --git a/lib/LibDriver/LibDriver.cpp b/lib/LibDriver/LibDriver.cpp
index cb3278c716e6..b33a22ff0cf8 100644
--- a/lib/LibDriver/LibDriver.cpp
+++ b/lib/LibDriver/LibDriver.cpp
@@ -56,17 +56,13 @@ public:
}
-static std::string getOutputPath(llvm::opt::InputArgList *Args) {
+static std::string getOutputPath(llvm::opt::InputArgList *Args,
+ const llvm::NewArchiveIterator &FirstMember) {
if (auto *Arg = Args->getLastArg(OPT_out))
return Arg->getValue();
- for (auto *Arg : Args->filtered(OPT_INPUT)) {
- if (!StringRef(Arg->getValue()).endswith_lower(".obj"))
- continue;
- SmallString<128> Val = StringRef(Arg->getValue());
- llvm::sys::path::replace_extension(Val, ".lib");
- return Val.str();
- }
- llvm_unreachable("internal error");
+ SmallString<128> Val = FirstMember.getNew();
+ llvm::sys::path::replace_extension(Val, ".lib");
+ return Val.str();
}
static std::vector<StringRef> getSearchPaths(llvm::opt::InputArgList *Args,
@@ -144,7 +140,10 @@ int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) {
}
std::pair<StringRef, std::error_code> Result =
- llvm::writeArchive(getOutputPath(&Args), Members, /*WriteSymtab=*/true);
+ llvm::writeArchive(getOutputPath(&Args, Members[0]), Members,
+ /*WriteSymtab=*/true, object::Archive::K_GNU,
+ /*Deterministic*/ true);
+
if (Result.second) {
if (Result.first.empty())
Result.first = ArgsArr[0];
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 13c5ca9561df..6554d6a9e60e 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_library(LLVMMC
MCObjectStreamer.cpp
MCObjectWriter.cpp
MCRegisterInfo.cpp
+ MCSchedule.cpp
MCSection.cpp
MCSectionCOFF.cpp
MCSectionELF.cpp
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 9a65a3158972..227c937e8d1b 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -503,7 +503,8 @@ void MCAsmStreamer::EndCOFFSymbolDef() {
}
void MCAsmStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
- OS << "\t.safeseh\t" << *Symbol;
+ OS << "\t.safeseh\t";
+ Symbol->print(OS, MAI);
EmitEOL();
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index da6516a4ac92..f53b589e1aea 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -925,7 +925,7 @@ void MCAssembler::Finish() {
Fixups = FragWithFixups->getFixups();
Contents = FragWithFixups->getContents();
} else
- llvm_unreachable("Unknow fragment with fixups!");
+ llvm_unreachable("Unknown fragment with fixups!");
for (const MCFixup &Fixup : Fixups) {
uint64_t FixedValue;
bool IsPCRel;
diff --git a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index 68948d36d65c..5fc2ca44f5d4 100644
--- a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
+++ b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -16,6 +16,10 @@
using namespace llvm;
+namespace llvm {
+class Triple;
+}
+
// This function tries to add a symbolic operand in place of the immediate
// Value in the MCInst. The immediate Value has had any PC adjustment made by
// the caller. If the instruction is a branch instruction then IsBranch is true,
@@ -184,7 +188,7 @@ void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
}
namespace llvm {
-MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
+MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp,
void *DisInfo, MCContext *Ctx,
std::unique_ptr<MCRelocationInfo> &&RelInfo) {
diff --git a/lib/MC/MCInstrDesc.cpp b/lib/MC/MCInstrDesc.cpp
index decc2d84b252..5be2fa1b30b6 100644
--- a/lib/MC/MCInstrDesc.cpp
+++ b/lib/MC/MCInstrDesc.cpp
@@ -19,7 +19,7 @@
using namespace llvm;
-bool MCInstrDesc::getDeprecatedInfo(MCInst &MI, MCSubtargetInfo &STI,
+bool MCInstrDesc::getDeprecatedInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) const {
if (ComplexDeprecationInfo)
return ComplexDeprecationInfo(MI, STI, Info);
diff --git a/lib/MC/MCSchedule.cpp b/lib/MC/MCSchedule.cpp
new file mode 100644
index 000000000000..f3919427bf05
--- /dev/null
+++ b/lib/MC/MCSchedule.cpp
@@ -0,0 +1,34 @@
+//===- MCSchedule.cpp - Scheduling ------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the default scheduling model.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSchedule.h"
+#include <type_traits>
+
+using namespace llvm;
+
+static_assert(std::is_pod<MCSchedModel>::value,
+ "We shouldn't have a static constructor here");
+const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
+ DefaultMicroOpBufferSize,
+ DefaultLoopMicroOpBufferSize,
+ DefaultLoadLatency,
+ DefaultHighLatency,
+ DefaultMispredictPenalty,
+ false,
+ true,
+ 0,
+ nullptr,
+ nullptr,
+ 0,
+ 0,
+ nullptr};
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index ece775c4f08f..9210cf544b16 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -17,42 +17,34 @@
using namespace llvm;
-/// InitMCProcessorInfo - Set or change the CPU (optionally supplemented
-/// with feature string). Recompute feature bits and scheduling model.
-void
-MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
+static FeatureBitset getFeatures(StringRef CPU, StringRef FS,
+ ArrayRef<SubtargetFeatureKV> ProcDesc,
+ ArrayRef<SubtargetFeatureKV> ProcFeatures) {
SubtargetFeatures Features(FS);
- FeatureBits = Features.getFeatureBits(CPU, ProcDesc, ProcFeatures);
- InitCPUSchedModel(CPU);
+ return Features.getFeatureBits(CPU, ProcDesc, ProcFeatures);
}
-void
-MCSubtargetInfo::InitCPUSchedModel(StringRef CPU) {
+void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
+ FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures);
if (!CPU.empty())
- CPUSchedModel = getSchedModelForCPU(CPU);
+ CPUSchedModel = &getSchedModelForCPU(CPU);
else
- CPUSchedModel = MCSchedModel::GetDefaultSchedModel();
+ CPUSchedModel = &MCSchedModel::GetDefaultSchedModel();
}
-void MCSubtargetInfo::InitMCSubtargetInfo(
+void MCSubtargetInfo::setDefaultFeatures(StringRef CPU) {
+ FeatureBits = getFeatures(CPU, "", ProcDesc, ProcFeatures);
+}
+
+MCSubtargetInfo::MCSubtargetInfo(
const Triple &TT, StringRef C, StringRef FS,
ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
- const InstrStage *IS, const unsigned *OC, const unsigned *FP) {
- TargetTriple = TT;
- CPU = C;
- ProcFeatures = PF;
- ProcDesc = PD;
- ProcSchedModels = ProcSched;
- WriteProcResTable = WPR;
- WriteLatencyTable = WL;
- ReadAdvanceTable = RA;
-
- Stages = IS;
- OperandCycles = OC;
- ForwardingPaths = FP;
-
+ const InstrStage *IS, const unsigned *OC, const unsigned *FP)
+ : TargetTriple(TT), CPU(C), ProcFeatures(PF), ProcDesc(PD),
+ ProcSchedModels(ProcSched), WriteProcResTable(WPR), WriteLatencyTable(WL),
+ ReadAdvanceTable(RA), Stages(IS), OperandCycles(OC), ForwardingPaths(FP) {
InitMCProcessorInfo(CPU, FS);
}
@@ -82,8 +74,7 @@ FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
return FeatureBits;
}
-MCSchedModel
-MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
+const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
assert(ProcSchedModels && "Processor machine model not available!");
unsigned NumProcs = ProcDesc.size();
@@ -116,6 +107,6 @@ MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
/// Initialize an InstrItineraryData instance.
void MCSubtargetInfo::initInstrItins(InstrItineraryData &InstrItins) const {
- InstrItins =
- InstrItineraryData(CPUSchedModel, Stages, OperandCycles, ForwardingPaths);
+ InstrItins = InstrItineraryData(getSchedModel(), Stages, OperandCycles,
+ ForwardingPaths);
}
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index affc57471fdb..125380a9d140 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -19,9 +19,6 @@ using namespace llvm;
// Sentinel value for the absolute pseudo section.
MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast<MCSection *>(1);
-const unsigned MCSymbol::NumCommonAlignmentBits;
-const unsigned MCSymbol::NumFlagsBits;
-
void *MCSymbol::operator new(size_t s, const StringMapEntry<bool> *Name,
MCContext &Ctx) {
// We may need more space for a Name to account for alignment. So allocate
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 54ed954a90d9..d4821196a6cf 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
using namespace llvm;
using namespace object;
@@ -115,6 +116,23 @@ uint64_t Archive::Child::getRawSize() const {
return getHeader()->getSize();
}
+ErrorOr<StringRef> Archive::Child::getBuffer() const {
+ if (!Parent->IsThin)
+ return StringRef(Data.data() + StartOfFile, getSize());
+ ErrorOr<StringRef> Name = getName();
+ if (std::error_code EC = Name.getError())
+ return EC;
+ SmallString<128> FullName =
+ Parent->getMemoryBufferRef().getBufferIdentifier();
+ sys::path::remove_filename(FullName);
+ sys::path::append(FullName, *Name);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
+ if (std::error_code EC = Buf.getError())
+ return EC;
+ Parent->ThinBuffers.push_back(std::move(*Buf));
+ return Parent->ThinBuffers.back()->getBuffer();
+}
+
Archive::Child Archive::Child::getNext() const {
size_t SpaceToSkip = Data.size();
// If it's odd, add 1 to make it even.
@@ -162,10 +180,10 @@ ErrorOr<StringRef> Archive::Child::getName() const {
+ Parent->StringTable->getSize()))
return object_error::parse_failed;
- // GNU long file names end with a /.
+ // GNU long file names end with a "/\n".
if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
- StringRef::size_type End = StringRef(addr).find('/');
- return StringRef(addr, End);
+ StringRef::size_type End = StringRef(addr).find('\n');
+ return StringRef(addr, End - 1);
}
return StringRef(addr);
} else if (name.startswith("#1/")) {
@@ -186,7 +204,10 @@ ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
if (std::error_code EC = NameOrErr.getError())
return EC;
StringRef Name = NameOrErr.get();
- return MemoryBufferRef(getBuffer(), Name);
+ ErrorOr<StringRef> Buf = getBuffer();
+ if (std::error_code EC = Buf.getError())
+ return EC;
+ return MemoryBufferRef(*Buf, Name);
}
ErrorOr<std::unique_ptr<Binary>>
@@ -207,7 +228,8 @@ ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
}
Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
- : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()) {
+ : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()),
+ StringTable(child_end()), FirstRegular(child_end()) {
StringRef Buffer = Data.getBuffer();
// Check for sufficient magic.
if (Buffer.startswith(ThinMagic)) {
@@ -287,7 +309,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
++i;
if (i == e) {
- ec = object_error::parse_failed;
+ ec = std::error_code();
return;
}
Name = i->getRawName();
@@ -352,11 +374,11 @@ Archive::child_iterator Archive::child_end() const {
}
StringRef Archive::Symbol::getName() const {
- return Parent->SymbolTable->getBuffer().begin() + StringIndex;
+ return Parent->getSymbolTable().begin() + StringIndex;
}
ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
- const char *Buf = Parent->SymbolTable->getBuffer().begin();
+ const char *Buf = Parent->getSymbolTable().begin();
const char *Offsets = Buf;
if (Parent->kind() == K_MIPS64)
Offsets += sizeof(uint64_t);
@@ -420,7 +442,7 @@ Archive::Symbol Archive::Symbol::getNext() const {
// and the second being the offset into the archive of the member that
// define the symbol. After that the next uint32_t is the byte count of
// the string table followed by the string table.
- const char *Buf = Parent->SymbolTable->getBuffer().begin();
+ const char *Buf = Parent->getSymbolTable().begin();
uint32_t RanlibCount = 0;
RanlibCount = read32le(Buf) / 8;
// If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
@@ -437,8 +459,7 @@ Archive::Symbol Archive::Symbol::getNext() const {
}
} else {
// Go to one past next null.
- t.StringIndex =
- Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1;
+ t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
}
++t.SymbolIndex;
return t;
@@ -448,7 +469,7 @@ Archive::symbol_iterator Archive::symbol_begin() const {
if (!hasSymbolTable())
return symbol_iterator(Symbol(this, 0, 0));
- const char *buf = SymbolTable->getBuffer().begin();
+ const char *buf = getSymbolTable().begin();
if (kind() == K_GNU) {
uint32_t symbol_count = 0;
symbol_count = read32be(buf);
@@ -480,7 +501,7 @@ Archive::symbol_iterator Archive::symbol_begin() const {
symbol_count = read32le(buf);
buf += 4 + (symbol_count * 2); // Skip indices.
}
- uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin();
+ uint32_t string_start_offset = buf - getSymbolTable().begin();
return symbol_iterator(Symbol(this, 0, string_start_offset));
}
@@ -491,7 +512,7 @@ Archive::symbol_iterator Archive::symbol_end() const {
}
uint32_t Archive::getNumberOfSymbols() const {
- const char *buf = SymbolTable->getBuffer().begin();
+ const char *buf = getSymbolTable().begin();
if (kind() == K_GNU)
return read32be(buf);
if (kind() == K_MIPS64)
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index 00a56d13bfed..a40901c924ea 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -34,8 +34,6 @@
using namespace llvm;
-NewArchiveIterator::NewArchiveIterator() {}
-
NewArchiveIterator::NewArchiveIterator(object::Archive::child_iterator I,
StringRef Name)
: IsNewMember(false), Name(Name), OldI(I) {}
@@ -93,8 +91,12 @@ static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size,
}
}
-static void print32BE(raw_ostream &Out, uint32_t Val) {
- support::endian::Writer<support::big>(Out).write(Val);
+static void print32(raw_ostream &Out, object::Archive::Kind Kind,
+ uint32_t Val) {
+ if (Kind == object::Archive::K_GNU)
+ support::endian::Writer<support::big>(Out).write(Val);
+ else
+ support::endian::Writer<support::little>(Out).write(Val);
}
static void printRestOfMemberHeader(raw_fd_ostream &Out,
@@ -109,18 +111,42 @@ static void printRestOfMemberHeader(raw_fd_ostream &Out,
Out << "`\n";
}
-static void printMemberHeader(raw_fd_ostream &Out, StringRef Name,
- const sys::TimeValue &ModTime, unsigned UID,
- unsigned GID, unsigned Perms, unsigned Size) {
+static void printGNUSmallMemberHeader(raw_fd_ostream &Out, StringRef Name,
+ const sys::TimeValue &ModTime,
+ unsigned UID, unsigned GID,
+ unsigned Perms, unsigned Size) {
printWithSpacePadding(Out, Twine(Name) + "/", 16);
printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
}
-static void printMemberHeader(raw_fd_ostream &Out, unsigned NameOffset,
- const sys::TimeValue &ModTime, unsigned UID,
- unsigned GID, unsigned Perms, unsigned Size) {
+static void printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name,
+ const sys::TimeValue &ModTime, unsigned UID,
+ unsigned GID, unsigned Perms, unsigned Size) {
+ uint64_t PosAfterHeader = Out.tell() + 60 + Name.size();
+ // Pad so that even 64 bit object files are aligned.
+ unsigned Pad = OffsetToAlignment(PosAfterHeader, 8);
+ unsigned NameWithPadding = Name.size() + Pad;
+ printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16);
+ printRestOfMemberHeader(Out, ModTime, UID, GID, Perms,
+ NameWithPadding + Size);
+ Out << Name;
+ assert(PosAfterHeader == Out.tell());
+ while (Pad--)
+ Out.write(uint8_t(0));
+}
+
+static void
+printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind,
+ StringRef Name,
+ std::vector<unsigned>::iterator &StringMapIndexIter,
+ const sys::TimeValue &ModTime, unsigned UID, unsigned GID,
+ unsigned Perms, unsigned Size) {
+ if (Kind == object::Archive::K_BSD)
+ return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
+ if (Name.size() < 16)
+ return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
Out << '/';
- printWithSpacePadding(Out, NameOffset, 15);
+ printWithSpacePadding(Out, *StringMapIndexIter++, 15);
printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
}
@@ -152,20 +178,26 @@ static void writeStringTable(raw_fd_ostream &Out,
Out.seek(Pos);
}
+static sys::TimeValue now(bool Deterministic) {
+ if (!Deterministic)
+ return sys::TimeValue::now();
+ sys::TimeValue TV;
+ TV.fromEpochTime(0);
+ return TV;
+}
+
// Returns the offset of the first reference to a member offset.
static ErrorOr<unsigned>
-writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members,
+writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind,
+ ArrayRef<NewArchiveIterator> Members,
ArrayRef<MemoryBufferRef> Buffers,
- std::vector<unsigned> &MemberOffsetRefs) {
- unsigned StartOffset = 0;
- unsigned MemberNum = 0;
- std::string NameBuf;
- raw_string_ostream NameOS(NameBuf);
- unsigned NumSyms = 0;
+ std::vector<unsigned> &MemberOffsetRefs, bool Deterministic) {
+ unsigned HeaderStartOffset = 0;
+ unsigned BodyStartOffset = 0;
+ SmallString<128> NameBuf;
+ raw_svector_ostream NameOS(NameBuf);
LLVMContext Context;
- for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(),
- E = Members.end();
- I != E; ++I, ++MemberNum) {
+ for (unsigned MemberNum = 0, N = Members.size(); MemberNum < N; ++MemberNum) {
MemoryBufferRef MemberBuffer = Buffers[MemberNum];
ErrorOr<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
object::SymbolicFile::createSymbolicFile(
@@ -174,10 +206,14 @@ writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members,
continue; // FIXME: check only for "not an object file" errors.
object::SymbolicFile &Obj = *ObjOrErr.get();
- if (!StartOffset) {
- printMemberHeader(Out, "", sys::TimeValue::now(), 0, 0, 0, 0);
- StartOffset = Out.tell();
- print32BE(Out, 0);
+ if (!HeaderStartOffset) {
+ HeaderStartOffset = Out.tell();
+ if (Kind == object::Archive::K_GNU)
+ printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0);
+ else
+ printBSDMemberHeader(Out, "__.SYMDEF", now(Deterministic), 0, 0, 0, 0);
+ BodyStartOffset = Out.tell();
+ print32(Out, Kind, 0); // number of entries or bytes
}
for (const object::BasicSymbolRef &S : Obj.symbols()) {
@@ -188,35 +224,53 @@ writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members,
continue;
if (Symflags & object::SymbolRef::SF_Undefined)
continue;
+
+ unsigned NameOffset = NameOS.tell();
if (auto EC = S.printName(NameOS))
return EC;
NameOS << '\0';
- ++NumSyms;
MemberOffsetRefs.push_back(MemberNum);
- print32BE(Out, 0);
+ if (Kind == object::Archive::K_BSD)
+ print32(Out, Kind, NameOffset);
+ print32(Out, Kind, 0); // member offset
}
}
- Out << NameOS.str();
- if (StartOffset == 0)
+ if (HeaderStartOffset == 0)
return 0;
- if (Out.tell() % 2)
- Out << '\0';
+ StringRef StringTable = NameOS.str();
+ if (Kind == object::Archive::K_BSD)
+ print32(Out, Kind, StringTable.size()); // byte count of the string table
+ Out << StringTable;
+
+ // ld64 requires the next member header to start at an offset that is
+ // 4 bytes aligned.
+ unsigned Pad = OffsetToAlignment(Out.tell(), 4);
+ while (Pad--)
+ Out.write(uint8_t(0));
+ // Patch up the size of the symbol table now that we know how big it is.
unsigned Pos = Out.tell();
- Out.seek(StartOffset - 12);
- printWithSpacePadding(Out, Pos - StartOffset, 10);
- Out.seek(StartOffset);
- print32BE(Out, NumSyms);
+ const unsigned MemberHeaderSize = 60;
+ Out.seek(HeaderStartOffset + 48); // offset of the size field.
+ printWithSpacePadding(Out, Pos - MemberHeaderSize - HeaderStartOffset, 10);
+
+ // Patch up the number of symbols.
+ Out.seek(BodyStartOffset);
+ unsigned NumSyms = MemberOffsetRefs.size();
+ if (Kind == object::Archive::K_GNU)
+ print32(Out, Kind, NumSyms);
+ else
+ print32(Out, Kind, NumSyms * 8);
+
Out.seek(Pos);
- return StartOffset + 4;
+ return BodyStartOffset + 4;
}
-std::pair<StringRef, std::error_code>
-llvm::writeArchive(StringRef ArcName,
- std::vector<NewArchiveIterator> &NewMembers,
- bool WriteSymtab) {
+std::pair<StringRef, std::error_code> llvm::writeArchive(
+ StringRef ArcName, std::vector<NewArchiveIterator> &NewMembers,
+ bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic) {
SmallString<128> TmpArchive;
int TmpArchiveFD;
if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a",
@@ -267,56 +321,60 @@ llvm::writeArchive(StringRef ArcName,
unsigned MemberReferenceOffset = 0;
if (WriteSymtab) {
- ErrorOr<unsigned> MemberReferenceOffsetOrErr =
- writeSymbolTable(Out, NewMembers, Members, MemberOffsetRefs);
+ ErrorOr<unsigned> MemberReferenceOffsetOrErr = writeSymbolTable(
+ Out, Kind, NewMembers, Members, MemberOffsetRefs, Deterministic);
if (auto EC = MemberReferenceOffsetOrErr.getError())
return std::make_pair(ArcName, EC);
MemberReferenceOffset = MemberReferenceOffsetOrErr.get();
}
std::vector<unsigned> StringMapIndexes;
- writeStringTable(Out, NewMembers, StringMapIndexes);
+ if (Kind != object::Archive::K_BSD)
+ writeStringTable(Out, NewMembers, StringMapIndexes);
unsigned MemberNum = 0;
- unsigned LongNameMemberNum = 0;
unsigned NewMemberNum = 0;
+ std::vector<unsigned>::iterator StringMapIndexIter = StringMapIndexes.begin();
std::vector<unsigned> MemberOffset;
- for (std::vector<NewArchiveIterator>::iterator I = NewMembers.begin(),
- E = NewMembers.end();
- I != E; ++I, ++MemberNum) {
+ for (const NewArchiveIterator &I : NewMembers) {
+ MemoryBufferRef File = Members[MemberNum++];
unsigned Pos = Out.tell();
MemberOffset.push_back(Pos);
- MemoryBufferRef File = Members[MemberNum];
- if (I->isNewMember()) {
- StringRef FileName = I->getNew();
+ sys::TimeValue ModTime;
+ unsigned UID;
+ unsigned GID;
+ unsigned Perms;
+ if (Deterministic) {
+ ModTime.fromEpochTime(0);
+ UID = 0;
+ GID = 0;
+ Perms = 0644;
+ } else if (I.isNewMember()) {
const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum];
- NewMemberNum++;
-
- StringRef Name = sys::path::filename(FileName);
- if (Name.size() < 16)
- printMemberHeader(Out, Name, Status.getLastModificationTime(),
- Status.getUser(), Status.getGroup(),
- Status.permissions(), Status.getSize());
- else
- printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++],
- Status.getLastModificationTime(), Status.getUser(),
- Status.getGroup(), Status.permissions(),
- Status.getSize());
+ ModTime = Status.getLastModificationTime();
+ UID = Status.getUser();
+ GID = Status.getGroup();
+ Perms = Status.permissions();
} else {
- object::Archive::child_iterator OldMember = I->getOld();
- StringRef Name = I->getName();
+ object::Archive::child_iterator OldMember = I.getOld();
+ ModTime = OldMember->getLastModified();
+ UID = OldMember->getUID();
+ GID = OldMember->getGID();
+ Perms = OldMember->getAccessMode();
+ }
- if (Name.size() < 16)
- printMemberHeader(Out, Name, OldMember->getLastModified(),
- OldMember->getUID(), OldMember->getGID(),
- OldMember->getAccessMode(), OldMember->getSize());
- else
- printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++],
- OldMember->getLastModified(), OldMember->getUID(),
- OldMember->getGID(), OldMember->getAccessMode(),
- OldMember->getSize());
+ if (I.isNewMember()) {
+ StringRef FileName = I.getNew();
+ const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum++];
+ printMemberHeader(Out, Kind, sys::path::filename(FileName),
+ StringMapIndexIter, ModTime, UID, GID, Perms,
+ Status.getSize());
+ } else {
+ object::Archive::child_iterator OldMember = I.getOld();
+ printMemberHeader(Out, Kind, I.getName(), StringMapIndexIter, ModTime,
+ UID, GID, Perms, OldMember->getSize());
}
Out << File.getBuffer();
@@ -327,8 +385,11 @@ llvm::writeArchive(StringRef ArcName,
if (MemberReferenceOffset) {
Out.seek(MemberReferenceOffset);
- for (unsigned MemberNum : MemberOffsetRefs)
- print32BE(Out, MemberOffset[MemberNum]);
+ for (unsigned MemberNum : MemberOffsetRefs) {
+ if (Kind == object::Archive::K_BSD)
+ Out.seek(Out.tell() + 4); // skip over the string offset
+ print32(Out, Kind, MemberOffset[MemberNum]);
+ }
}
Output.keep();
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 64bb0d5c636d..bcca9839b475 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -154,30 +154,24 @@ ErrorOr<StringRef> COFFObjectFile::getSymbolName(DataRefImpl Ref) const {
return Result;
}
-uint64_t COFFObjectFile::getSymbolValue(DataRefImpl Ref) const {
- COFFSymbolRef Sym = getCOFFSymbol(Ref);
-
- if (Sym.isAnyUndefined() || Sym.isCommon())
- return UnknownAddress;
-
- return Sym.getValue();
+uint64_t COFFObjectFile::getSymbolValueImpl(DataRefImpl Ref) const {
+ return getCOFFSymbol(Ref).getValue();
}
-std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref,
- uint64_t &Result) const {
- Result = getSymbolValue(Ref);
+ErrorOr<uint64_t> COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const {
+ uint64_t Result = getSymbolValue(Ref);
COFFSymbolRef Symb = getCOFFSymbol(Ref);
int32_t SectionNumber = Symb.getSectionNumber();
if (Symb.isAnyUndefined() || Symb.isCommon() ||
COFF::isReservedSectionNumber(SectionNumber))
- return std::error_code();
+ return Result;
const coff_section *Section = nullptr;
if (std::error_code EC = getSection(SectionNumber, Section))
return EC;
Result += Section->VirtualAddress;
- return std::error_code();
+ return Result;
}
SymbolRef::Type COFFObjectFile::getSymbolType(DataRefImpl Ref) const {
@@ -362,6 +356,8 @@ getFirstReloc(const coff_section *Sec, MemoryBufferRef M, const uint8_t *Base) {
relocation_iterator COFFObjectFile::section_rel_begin(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
const coff_relocation *begin = getFirstReloc(Sec, Data, base());
+ if (begin && Sec->VirtualAddress != 0)
+ report_fatal_error("Sections with relocations should have an address of 0");
DataRefImpl Ret;
Ret.p = reinterpret_cast<uintptr_t>(begin);
return relocation_iterator(RelocationRef(Ret, this));
@@ -919,19 +915,15 @@ uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const {
// whether or not we have an executable image.
//
// For object files, SizeOfRawData contains the size of section's data;
- // VirtualSize is always zero.
+ // VirtualSize should be zero but isn't due to buggy COFF writers.
//
// For executables, SizeOfRawData *must* be a multiple of FileAlignment; the
// actual section size is in VirtualSize. It is possible for VirtualSize to
// be greater than SizeOfRawData; the contents past that point should be
// considered to be zero.
- uint32_t SectionSize;
- if (Sec->VirtualSize)
- SectionSize = std::min(Sec->VirtualSize, Sec->SizeOfRawData);
- else
- SectionSize = Sec->SizeOfRawData;
-
- return SectionSize;
+ if (getDOSHeader())
+ return std::min(Sec->VirtualSize, Sec->SizeOfRawData);
+ return Sec->SizeOfRawData;
}
std::error_code
@@ -961,10 +953,6 @@ void COFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
reinterpret_cast<const coff_relocation*>(Rel.p) + 1);
}
-ErrorOr<uint64_t> COFFObjectFile::getRelocationAddress(DataRefImpl Rel) const {
- report_fatal_error("getRelocationAddress not implemented in COFFObjectFile");
-}
-
uint64_t COFFObjectFile::getRelocationOffset(DataRefImpl Rel) const {
const coff_relocation *R = toRel(Rel);
return R->VirtualAddress;
diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp
index ecdd468305be..72c232c32870 100644
--- a/lib/Object/ELFYAML.cpp
+++ b/lib/Object/ELFYAML.cpp
@@ -627,6 +627,11 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
IO.mapOptional("Size", Section.Size, Hex64(Section.Content.binary_size()));
}
+static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Size", Section.Size, Hex64(0));
+}
+
static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Relocations", Section.Relocations);
@@ -682,6 +687,11 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
Section.reset(new ELFYAML::Group());
groupSectionMapping(IO, *cast<ELFYAML::Group>(Section.get()));
break;
+ case ELF::SHT_NOBITS:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::NoBitsSection());
+ sectionMapping(IO, *cast<ELFYAML::NoBitsSection>(Section.get()));
+ break;
case ELF::SHT_MIPS_ABIFLAGS:
if (!IO.outputting())
Section.reset(new ELFYAML::MipsABIFlags());
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 4255ed717fb9..05900630c75c 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -368,18 +368,12 @@ std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb,
return std::error_code();
}
-uint64_t MachOObjectFile::getSymbolValue(DataRefImpl Sym) const {
- uint64_t NValue = getNValue(Sym);
- MachO::nlist_base Entry = getSymbolTableEntryBase(this, Sym);
- if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0)
- return UnknownAddress;
- return NValue;
+uint64_t MachOObjectFile::getSymbolValueImpl(DataRefImpl Sym) const {
+ return getNValue(Sym);
}
-std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Sym,
- uint64_t &Res) const {
- Res = getSymbolValue(Sym);
- return std::error_code();
+ErrorOr<uint64_t> MachOObjectFile::getSymbolAddress(DataRefImpl Sym) const {
+ return getSymbolValue(Sym);
}
uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const {
@@ -392,9 +386,7 @@ uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const {
}
uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const {
- uint64_t Value;
- getSymbolAddress(DRI, Value);
- return Value;
+ return getNValue(DRI);
}
SymbolRef::Type MachOObjectFile::getSymbolType(DataRefImpl Symb) const {
@@ -422,9 +414,6 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
uint32_t Result = SymbolRef::SF_None;
- if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF)
- Result |= SymbolRef::SF_Undefined;
-
if ((MachOType & MachO::N_TYPE) == MachO::N_INDR)
Result |= SymbolRef::SF_Indirect;
@@ -434,10 +423,10 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
if (MachOType & MachO::N_EXT) {
Result |= SymbolRef::SF_Global;
if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) {
- uint64_t Value;
- getSymbolAddress(DRI, Value);
- if (Value && Value != UnknownAddress)
+ if (getNValue(DRI))
Result |= SymbolRef::SF_Common;
+ else
+ Result |= SymbolRef::SF_Undefined;
}
if (!(MachOType & MachO::N_PEXT))
@@ -593,15 +582,6 @@ void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
++Rel.d.b;
}
-ErrorOr<uint64_t> MachOObjectFile::getRelocationAddress(DataRefImpl Rel) const {
- uint64_t Offset = getRelocationOffset(Rel);
-
- DataRefImpl Sec;
- Sec.d.a = Rel.d.a;
- uint64_t SecAddress = getSectionAddress(Sec);
- return SecAddress + Offset;
-}
-
uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const {
assert(getHeader().filetype == MachO::MH_OBJECT &&
"Only implemented for MH_OBJECT");
@@ -932,6 +912,13 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
return std::error_code();
}
+section_iterator
+MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const {
+ DataRefImpl Sec;
+ Sec.d.a = Rel->getRawDataRefImpl().d.a;
+ return section_iterator(SectionRef(Sec, this));
+}
+
basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const {
return getSymbolByIndex(0);
}
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 945252b21046..5c4b7a67b2ad 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -180,10 +180,10 @@ const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
}
uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
- uint64_t ret;
- if (std::error_code ec = (*unwrap(SI))->getAddress(ret))
- report_fatal_error(ec.message());
- return ret;
+ ErrorOr<uint64_t> Ret = (*unwrap(SI))->getAddress();
+ if (std::error_code EC = Ret.getError())
+ report_fatal_error(EC.message());
+ return *Ret;
}
uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
@@ -191,13 +191,6 @@ uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
}
// RelocationRef accessors
-uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) {
- ErrorOr<uint64_t> Ret = (*unwrap(RI))->getAddress();
- if (std::error_code EC = Ret.getError())
- report_fatal_error(EC.message());
- return *Ret;
-}
-
uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) {
return (*unwrap(RI))->getOffset();
}
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 04e4916f94ef..f82edae89bc6 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -35,6 +35,15 @@ bool SectionRef::containsSymbol(SymbolRef S) const {
return *this == *SymSec;
}
+uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const {
+ uint32_t Flags = getSymbolFlags(Ref);
+ if (Flags & SymbolRef::SF_Undefined)
+ return 0;
+ if (Flags & SymbolRef::SF_Common)
+ return getCommonSymbolSize(Ref);
+ return getSymbolValueImpl(Ref);
+}
+
std::error_code ObjectFile::printSymbolName(raw_ostream &OS,
DataRefImpl Symb) const {
ErrorOr<StringRef> Name = getSymbolName(Symb);
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 4b0a0e5d4819..5d31225396d4 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -52,14 +52,17 @@ namespace llvm {
/* Number of bits in the significand. This includes the integer
bit. */
unsigned int precision;
+
+ /* Number of bits actually used in the semantics. */
+ unsigned int sizeInBits;
};
- const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
- const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
- const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
- const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
- const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
- const fltSemantics APFloat::Bogus = { 0, 0, 0 };
+ const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 };
+ const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 };
+ const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 };
+ const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 };
+ const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 };
+ const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 };
/* The PowerPC format consists of two doubles. It does not map cleanly
onto the usual format above. It is approximated using twice the
@@ -72,7 +75,7 @@ namespace llvm {
to represent all possible values held by a PPC double-double number,
for example: (long double) 1.0 + (long double) 0x1p-106
Should this be replaced by a full emulation of PPC double-double? */
- const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
+ const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 };
/* A tight upper bound on number of parts required to hold the value
pow(5, power) is
@@ -2416,7 +2419,7 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
roundingMode rounding_mode)
{
unsigned int parts, pow5PartCount;
- fltSemantics calcSemantics = { 32767, -32767, 0 };
+ fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
integerPart pow5Parts[maxPowerOfFiveParts];
bool isNearest;
@@ -3368,6 +3371,10 @@ APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
}
}
+unsigned APFloat::getSizeInBits(const fltSemantics &Sem) {
+ return Sem.sizeInBits;
+}
+
/// Make this number the largest magnitude normal number in the given
/// semantics.
void APFloat::makeLargest(bool Negative) {
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index dcaacf6248d1..17fba95ebb2b 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -46,21 +46,21 @@ using namespace cl;
//
namespace llvm {
namespace cl {
-TEMPLATE_INSTANTIATION(class basic_parser<bool>);
-TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>);
-TEMPLATE_INSTANTIATION(class basic_parser<int>);
-TEMPLATE_INSTANTIATION(class basic_parser<unsigned>);
-TEMPLATE_INSTANTIATION(class basic_parser<unsigned long long>);
-TEMPLATE_INSTANTIATION(class basic_parser<double>);
-TEMPLATE_INSTANTIATION(class basic_parser<float>);
-TEMPLATE_INSTANTIATION(class basic_parser<std::string>);
-TEMPLATE_INSTANTIATION(class basic_parser<char>);
-
-TEMPLATE_INSTANTIATION(class opt<unsigned>);
-TEMPLATE_INSTANTIATION(class opt<int>);
-TEMPLATE_INSTANTIATION(class opt<std::string>);
-TEMPLATE_INSTANTIATION(class opt<char>);
-TEMPLATE_INSTANTIATION(class opt<bool>);
+template class basic_parser<bool>;
+template class basic_parser<boolOrDefault>;
+template class basic_parser<int>;
+template class basic_parser<unsigned>;
+template class basic_parser<unsigned long long>;
+template class basic_parser<double>;
+template class basic_parser<float>;
+template class basic_parser<std::string>;
+template class basic_parser<char>;
+
+template class opt<unsigned>;
+template class opt<int>;
+template class opt<std::string>;
+template class opt<char>;
+template class opt<bool>;
}
} // end namespace llvm::cl
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 92be0e047f62..c6646fb101b7 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -1165,6 +1165,122 @@ Triple Triple::get64BitArchVariant() const {
return T;
}
+Triple Triple::getBigEndianArchVariant() const {
+ Triple T(*this);
+ switch (getArch()) {
+ case Triple::UnknownArch:
+ case Triple::amdgcn:
+ case Triple::amdil64:
+ case Triple::amdil:
+ case Triple::hexagon:
+ case Triple::hsail64:
+ case Triple::hsail:
+ case Triple::kalimba:
+ case Triple::le32:
+ case Triple::le64:
+ case Triple::msp430:
+ case Triple::nvptx64:
+ case Triple::nvptx:
+ case Triple::r600:
+ case Triple::shave:
+ case Triple::spir64:
+ case Triple::spir:
+ case Triple::wasm32:
+ case Triple::wasm64:
+ case Triple::x86:
+ case Triple::x86_64:
+ case Triple::xcore:
+
+ // ARM is intentionally unsupported here, changing the architecture would
+ // drop any arch suffixes.
+ case Triple::arm:
+ case Triple::thumb:
+ T.setArch(UnknownArch);
+ break;
+
+ case Triple::aarch64_be:
+ case Triple::armeb:
+ case Triple::bpfeb:
+ case Triple::mips64:
+ case Triple::mips:
+ case Triple::ppc64:
+ case Triple::ppc:
+ case Triple::sparc:
+ case Triple::sparcv9:
+ case Triple::systemz:
+ case Triple::tce:
+ case Triple::thumbeb:
+ // Already big endian.
+ break;
+
+ case Triple::aarch64: T.setArch(Triple::aarch64_be); break;
+ case Triple::bpfel: T.setArch(Triple::bpfeb); break;
+ case Triple::mips64el:T.setArch(Triple::mips64); break;
+ case Triple::mipsel: T.setArch(Triple::mips); break;
+ case Triple::ppc64le: T.setArch(Triple::ppc64); break;
+ case Triple::sparcel: T.setArch(Triple::sparc); break;
+ }
+ return T;
+}
+
+Triple Triple::getLittleEndianArchVariant() const {
+ Triple T(*this);
+ switch (getArch()) {
+ case Triple::UnknownArch:
+ case Triple::ppc:
+ case Triple::sparcv9:
+ case Triple::systemz:
+ case Triple::tce:
+
+ // ARM is intentionally unsupported here, changing the architecture would
+ // drop any arch suffixes.
+ case Triple::armeb:
+ case Triple::thumbeb:
+ T.setArch(UnknownArch);
+ break;
+
+ case Triple::aarch64:
+ case Triple::amdgcn:
+ case Triple::amdil64:
+ case Triple::amdil:
+ case Triple::arm:
+ case Triple::bpfel:
+ case Triple::hexagon:
+ case Triple::hsail64:
+ case Triple::hsail:
+ case Triple::kalimba:
+ case Triple::le32:
+ case Triple::le64:
+ case Triple::mips64el:
+ case Triple::mipsel:
+ case Triple::msp430:
+ case Triple::nvptx64:
+ case Triple::nvptx:
+ case Triple::ppc64le:
+ case Triple::r600:
+ case Triple::shave:
+ case Triple::sparcel:
+ case Triple::spir64:
+ case Triple::spir:
+ case Triple::thumb:
+ case Triple::wasm32:
+ case Triple::wasm64:
+ case Triple::x86:
+ case Triple::x86_64:
+ case Triple::xcore:
+ // Already little endian.
+ break;
+
+ case Triple::aarch64_be: T.setArch(Triple::aarch64); break;
+ case Triple::bpfeb: T.setArch(Triple::bpfel); break;
+ case Triple::mips64: T.setArch(Triple::mips64el); break;
+ case Triple::mips: T.setArch(Triple::mipsel); break;
+ case Triple::ppc64: T.setArch(Triple::ppc64le); break;
+ case Triple::sparc: T.setArch(Triple::sparcel); break;
+ }
+ return T;
+}
+
const char *Triple::getARMCPUForArch(StringRef MArch) const {
if (MArch.empty())
MArch = getArchName();
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 6e982bf1da19..c9a31b64cfd3 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -1648,7 +1648,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
}
OS << " {";
- const std::vector<Record*> &SC = R.getSuperClasses();
+ ArrayRef<Record *> SC = R.getSuperClasses();
if (!SC.empty()) {
OS << "\t//";
for (const Record *Super : SC)
diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp
index 92f5b2dd7172..07c538159dcb 100644
--- a/lib/TableGen/SetTheory.cpp
+++ b/lib/TableGen/SetTheory.cpp
@@ -302,7 +302,7 @@ const RecVec *SetTheory::expand(Record *Set) {
return &I->second;
// This is the first time we see Set. Find a suitable expander.
- const std::vector<Record*> &SC = Set->getSuperClasses();
+ ArrayRef<Record *> SC = Set->getSuperClasses();
for (unsigned i = 0, e = SC.size(); i != e; ++i) {
// Skip unnamed superclasses.
if (!dyn_cast<StringInit>(SC[i]->getNameInit()))
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 15df25aea50e..5c36fda2e1ca 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -184,7 +184,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
// Since everything went well, we can now set the "superclass" list for the
// current record.
- const std::vector<Record*> &SCs = SC->getSuperClasses();
+ ArrayRef<Record *> SCs = SC->getSuperClasses();
ArrayRef<SMRange> SCRanges = SC->getSuperClassRanges();
for (unsigned i = 0, e = SCs.size(); i != e; ++i) {
if (CurRec->isSubClassOf(SCs[i]))
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index bffd9e6e8c76..79a84ad8c6c5 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -510,9 +510,17 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
if (J.isRegMask())
AvailableRegs.clearBitsNotInMask(J.getRegMask());
- if (J.isReg() && J.isDef() && AvailableRegs[J.getReg()]) {
- assert(J.isDead() && "Non-dead def should have been removed by now!");
- AvailableRegs.reset(J.getReg());
+ if (J.isReg() && J.isDef()) {
+ MCRegAliasIterator AI(J.getReg(), TRI, /*IncludeSelf=*/true);
+ if (J.isDead())
+ for (; AI.isValid(); ++AI)
+ AvailableRegs.reset(*AI);
+#ifndef NDEBUG
+ else
+ for (; AI.isValid(); ++AI)
+ assert(!AvailableRegs[*AI] &&
+ "Non-dead def should have been removed by now!");
+#endif
}
}
}
@@ -585,7 +593,6 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
if (Change) {
Substs[MO.getReg()] = Reg;
MO.setReg(Reg);
- MRI->setPhysRegUsed(Reg);
Changed = true;
}
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 4691e949838d..815ebef177d8 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -40,6 +40,11 @@ def CC_AArch64_AAPCS : CallingConv<[
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
+ // The 'nest' parameter, if any, is passed in X18.
+ // Darwin uses X18 as the platform register and hence 'nest' isn't currently
+ // supported there.
+ CCIfNest<CCAssignToReg<[X18]>>,
+
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index c19fcdc4bb18..072819836bb3 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -310,7 +310,7 @@ CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
}
unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
- assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
+ assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
"Alloca should always return a pointer.");
// Don't handle dynamic allocas.
@@ -420,7 +420,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
- EVT DestEVT = TLI.getValueType(GV->getType(), true);
+ EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
if (!DestEVT.isSimple())
return 0;
@@ -459,7 +459,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
}
unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple())
@@ -538,13 +538,14 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::PtrToInt: {
// Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
}
@@ -879,13 +880,13 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
case Instruction::IntToPtr:
// Look past no-op inttoptrs if its operand is in the same BB.
if (InMBB &&
- TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return computeCallAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints if its operand is in the same BB.
- if (InMBB &&
- TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeCallAddress(U->getOperand(0), Addr);
break;
}
@@ -906,7 +907,7 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
- EVT evt = TLI.getValueType(Ty, true);
+ EVT evt = TLI.getValueType(DL, Ty, true);
// Only handle simple types.
if (evt == MVT::Other || !evt.isSimple())
@@ -1390,7 +1391,7 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
Type *Ty = LHS->getType();
- EVT EVT = TLI.getValueType(Ty, true);
+ EVT EVT = TLI.getValueType(DL, Ty, true);
if (!EVT.isSimple())
return false;
MVT VT = EVT.getSimpleVT();
@@ -2761,7 +2762,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
if (SrcReg == 0)
return false;
- EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
if (SrcVT == MVT::f128)
return false;
@@ -2797,7 +2798,7 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
return false;
bool SrcIsKill = hasTrivialKill(I->getOperand(0));
- EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
// Handle sign-extension.
if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
@@ -2856,7 +2857,7 @@ bool AArch64FastISel::fastLowerArguments() {
if (ArgTy->isStructTy() || ArgTy->isArrayTy())
return false;
- EVT ArgVT = TLI.getValueType(ArgTy);
+ EVT ArgVT = TLI.getValueType(DL, ArgTy);
if (!ArgVT.isSimple())
return false;
@@ -2898,7 +2899,7 @@ bool AArch64FastISel::fastLowerArguments() {
unsigned GPRIdx = 0;
unsigned FPRIdx = 0;
for (auto const &Arg : F->args()) {
- MVT VT = TLI.getSimpleValueType(Arg.getType());
+ MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
unsigned SrcReg;
const TargetRegisterClass *RC;
if (VT >= MVT::i1 && VT <= MVT::i32) {
@@ -3689,7 +3690,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
CallingConv::ID CC = F.getCallingConv();
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -3724,7 +3725,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (!MRI.getRegClass(SrcReg)->contains(DestReg))
return false;
- EVT RVEVT = TLI.getValueType(RV->getType());
+ EVT RVEVT = TLI.getValueType(DL, RV->getType());
if (!RVEVT.isSimple())
return false;
@@ -3772,8 +3773,8 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) {
Value *Op = I->getOperand(0);
Type *SrcTy = Op->getType();
- EVT SrcEVT = TLI.getValueType(SrcTy, true);
- EVT DestEVT = TLI.getValueType(DestTy, true);
+ EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
+ EVT DestEVT = TLI.getValueType(DL, DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
@@ -4459,7 +4460,7 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) {
}
bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
- EVT DestEVT = TLI.getValueType(I->getType(), true);
+ EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
if (!DestEVT.isSimple())
return false;
@@ -4825,7 +4826,7 @@ std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
bool IdxNIsKill = hasTrivialKill(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend it.
- MVT PtrVT = TLI.getPointerTy();
+ MVT PtrVT = TLI.getPointerTy(DL);
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
if (IdxVT.bitsLT(PtrVT)) {
IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
@@ -4849,7 +4850,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
// into a single N = N + TotalOffset.
uint64_t TotalOffs = 0;
Type *Ty = I->getOperand(0)->getType();
- MVT VT = TLI.getPointerTy();
+ MVT VT = TLI.getPointerTy(DL);
for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
const Value *Idx = *OI;
if (auto *StTy = dyn_cast<StructType>(Ty)) {
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 3ba7e70a102d..a7817f4f67dd 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -349,12 +349,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Allocate space for the rest of the frame.
const unsigned Alignment = MFI->getMaxAlignment();
- const bool NeedsRealignment = (Alignment > 16);
+ const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
unsigned scratchSPReg = AArch64::SP;
- if (NeedsRealignment) {
- // Use the first callee-saved register as a scratch register
- assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) &&
- "No scratch register to align SP!");
+ if (NumBytes && NeedsRealignment) {
+ // Use the first callee-saved register as a scratch register.
scratchSPReg = AArch64::X9;
}
@@ -366,9 +364,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
- assert(!(NeedsRealignment && NumBytes==0) &&
- "NumBytes should never be 0 when realignment is needed");
-
if (NumBytes && NeedsRealignment) {
const unsigned NrBitsToZero = countTrailingZeros(Alignment);
assert(NrBitsToZero > 1);
@@ -881,28 +876,34 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
return true;
}
-void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
- MachineFunction &MF, RegScavenger *RS) const {
+void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- MachineRegisterInfo *MRI = &MF.getRegInfo();
SmallVector<unsigned, 4> UnspilledCSGPRs;
SmallVector<unsigned, 4> UnspilledCSFPRs;
// The frame record needs to be created by saving the appropriate registers
if (hasFP(MF)) {
- MRI->setPhysRegUsed(AArch64::FP);
- MRI->setPhysRegUsed(AArch64::LR);
+ SavedRegs.set(AArch64::FP);
+ SavedRegs.set(AArch64::LR);
}
// Spill the BasePtr if it's used. Do this first thing so that the
// getCalleeSavedRegs() below will get the right answer.
if (RegInfo->hasBasePointer(MF))
- MRI->setPhysRegUsed(RegInfo->getBaseRegister());
+ SavedRegs.set(RegInfo->getBaseRegister());
if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
- MRI->setPhysRegUsed(AArch64::X9);
+ SavedRegs.set(AArch64::X9);
// If any callee-saved registers are used, the frame cannot be eliminated.
unsigned NumGPRSpilled = 0;
@@ -924,8 +925,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
AArch64::FPR64RegClass.contains(EvenReg)) &&
"Register class mismatch!");
- const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
- const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
+ const bool OddRegUsed = SavedRegs.test(OddReg);
+ const bool EvenRegUsed = SavedRegs.test(EvenReg);
// Early exit if none of the registers in the register pair is actually
// used.
@@ -946,7 +947,7 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
if (OddRegUsed ^ EvenRegUsed) {
// Find out which register is the additional spill.
Reg = OddRegUsed ? EvenReg : OddReg;
- MRI->setPhysRegUsed(Reg);
+ SavedRegs.set(Reg);
}
DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
@@ -1001,7 +1002,7 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
UnspilledCSGPRs.pop_back();
DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
<< " to get a scratch register.\n");
- MRI->setPhysRegUsed(Reg);
+ SavedRegs.set(Reg);
ExtraCSSpill = true;
++Count;
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index b496fccba349..731f031ff855 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -59,8 +59,8 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
};
} // End llvm namespace
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 1ea4abcf05fa..772e894f4f0a 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -610,10 +610,11 @@ static bool isWorthFoldingADDlow(SDValue N) {
bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
SDValue &Base, SDValue &OffImm) {
SDLoc dl(N);
+ const DataLayout &DL = CurDAG->getDataLayout();
const TargetLowering *TLI = getTargetLowering();
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
return true;
}
@@ -628,10 +629,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
const GlobalValue *GV = GAN->getGlobal();
unsigned Alignment = GV->getAlignment();
- const DataLayout *DL = TLI->getDataLayout();
Type *Ty = GV->getType()->getElementType();
if (Alignment == 0 && Ty->isSized())
- Alignment = DL->getABITypeAlignment(Ty);
+ Alignment = DL.getABITypeAlignment(Ty);
if (Alignment >= Size)
return true;
@@ -645,7 +645,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
}
OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
return true;
@@ -688,7 +688,8 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
const TargetLowering *TLI = getTargetLowering();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
return true;
@@ -1494,7 +1495,7 @@ static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
}
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
- unsigned &LSB, unsigned &MSB,
+ unsigned &Immr, unsigned &Imms,
bool BiggerPattern) {
assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
"N must be a SHR/SRA operation to call this function");
@@ -1508,7 +1509,7 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
"Type checking must have been done before calling this function");
// Check for AND + SRL doing several bits extract.
- if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
+ if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
return true;
// we're looking for a shift of a shift
@@ -1548,13 +1549,9 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
"bad amount in shift node!");
- // Note: The width operand is encoded as width-1.
- unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
- int sLSB = Srl_imm - Shl_imm;
- if (sLSB < 0)
- return false;
- LSB = sLSB;
- MSB = LSB + Width;
+ int immr = Srl_imm - Shl_imm;
+ Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
+ Imms = VT.getSizeInBits() - Shl_imm - Trunc_bits - 1;
// SRA requires a signed extraction
if (VT == MVT::i32)
Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
@@ -1564,7 +1561,7 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
}
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
- SDValue &Opd0, unsigned &LSB, unsigned &MSB,
+ SDValue &Opd0, unsigned &Immr, unsigned &Imms,
unsigned NumberOfIgnoredLowBits = 0,
bool BiggerPattern = false) {
if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
@@ -1576,11 +1573,11 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
return false;
break;
case ISD::AND:
- return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
+ return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
NumberOfIgnoredLowBits, BiggerPattern);
case ISD::SRL:
case ISD::SRA:
- return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
+ return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
}
unsigned NOpc = N->getMachineOpcode();
@@ -1593,8 +1590,8 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
case AArch64::UBFMXri:
Opc = NOpc;
Opd0 = N->getOperand(0);
- LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+ Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+ Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
return true;
}
// Unreachable
@@ -1602,9 +1599,9 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
}
SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
- unsigned Opc, LSB, MSB;
+ unsigned Opc, Immr, Imms;
SDValue Opd0;
- if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
+ if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
return nullptr;
EVT VT = N->getValueType(0);
@@ -1613,8 +1610,8 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
// If the bit extract operation is 64bit but the original type is 32bit, we
// need to add one EXTRACT_SUBREG.
if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
- SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, MVT::i64),
- CurDAG->getTargetConstant(MSB, dl, MVT::i64)};
+ SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
+ CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
@@ -1624,8 +1621,8 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
return Node;
}
- SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, VT),
- CurDAG->getTargetConstant(MSB, dl, VT)};
+ SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
+ CurDAG->getTargetConstant(Imms, dl, VT)};
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
@@ -2351,7 +2348,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
const TargetLowering *TLI = getTargetLowering();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ SDValue TFI = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
SDLoc DL(Node);
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index f3242cdd971d..3e8f46cf1ecd 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -705,7 +705,8 @@ void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
addTypeForNEON(VT, MVT::v4i32);
}
-EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+ EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
@@ -774,7 +775,8 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
}
}
-MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
+MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
+ EVT) const {
return MVT::i64;
}
@@ -1710,7 +1712,8 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
const char *LibcallName =
(ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
- SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+ SDValue Callee =
+ DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2089,7 +2092,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
CurArgIdx = Ins[i].getOrigArgIndex();
// Get type of the original argument.
- EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
+ EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
+ /*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
@@ -2111,7 +2115,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
if (Ins[i].Flags.isByVal()) {
// Byval is used for HFAs in the PCS, but the system should work in a
// non-compliant manner for larger structs.
- EVT PtrTy = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
int Size = Ins[i].Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
@@ -2119,7 +2123,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// case. It should also work for fundamental types too.
unsigned FrameIdx =
MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
- SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+ SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
InVals.push_back(FrameIdxN);
continue;
@@ -2186,7 +2190,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
// Create load nodes to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue ArgValue;
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
@@ -2265,6 +2269,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
SmallVector<SDValue, 8> MemOps;
@@ -2279,7 +2284,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
if (GPRSaveSize != 0) {
GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
- SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
@@ -2288,8 +2293,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
DAG.getStore(Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(i * 8), false, false, 0);
MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(8, DL, getPointerTy()));
+ FIN =
+ DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
}
}
FuncInfo->setVarArgsGPRIndex(GPRIdx);
@@ -2307,7 +2312,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
if (FPRSaveSize != 0) {
FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
- SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
@@ -2317,8 +2322,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
DAG.getStore(Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(i * 16), false, false, 0);
MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(16, DL, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
+ DAG.getConstant(16, DL, PtrVT));
}
}
FuncInfo->setVarArgsFPRIndex(FPRIdx);
@@ -2614,7 +2619,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Outs[i].VT;
// Get type of the original argument.
- EVT ActualVT = getValueType(CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
+ EVT ActualVT = getValueType(DAG.getDataLayout(),
+ CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
@@ -2674,10 +2680,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
true),
DL);
- SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy());
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
+ getPointerTy(DAG.getDataLayout()));
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
@@ -2743,13 +2751,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset + BEAlign;
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
- PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
if (IsTailCall) {
Offset = Offset + FPDiff;
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
- DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+ DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo = MachinePointerInfo::getFixedStack(FI);
// Make sure any stack arguments overlapping with where we're storing
@@ -2759,7 +2767,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
} else {
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
- DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+ DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
DstInfo = MachinePointerInfo::getStack(LocMemOffset);
}
@@ -2809,25 +2817,24 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
const GlobalValue *GV = G->getGlobal();
bool InternalLinkage = GV->hasInternalLinkage();
if (InternalLinkage)
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
else {
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0,
- AArch64II::MO_GOT);
- Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+ Callee =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
+ Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
}
} else if (ExternalSymbolSDNode *S =
dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- Callee =
- DAG.getTargetExternalSymbol(Sym, getPointerTy(), AArch64II::MO_GOT);
- Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
+ Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0);
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
// We don't usually want to end the call-sequence here because we would tidy
@@ -2977,7 +2984,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
@@ -3069,7 +3076,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
SDLoc DL(Op);
- MVT PtrVT = getPointerTy();
+ MVT PtrVT = getPointerTy(DAG.getDataLayout());
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDValue TLVPAddr =
@@ -3124,7 +3131,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
/// the sequence is produced as per above.
SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -3159,7 +3166,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
}
SDValue TPOff;
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
const GlobalValue *GV = GA->getGlobal();
@@ -3786,7 +3793,7 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
@@ -3812,7 +3819,7 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
@@ -3853,7 +3860,7 @@ SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
@@ -3879,8 +3886,8 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
- SDValue FR =
- DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
+ getPointerTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
@@ -3892,6 +3899,7 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
// Standard, section B.3.
MachineFunction &MF = DAG.getMachineFunction();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
@@ -3900,8 +3908,7 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
SmallVector<SDValue, 4> MemOps;
// void *__stack at offset 0
- SDValue Stack =
- DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+ SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
MachinePointerInfo(SV), false, false, 8));
@@ -3910,12 +3917,12 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
if (GPRSize > 0) {
SDValue GRTop, GRTopAddr;
- GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(8, DL, getPointerTy()));
+ GRTopAddr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
- GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
- GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
- DAG.getConstant(GPRSize, DL, getPointerTy()));
+ GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
+ GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
+ DAG.getConstant(GPRSize, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
MachinePointerInfo(SV, 8), false, false, 8));
@@ -3925,28 +3932,28 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
int FPRSize = FuncInfo->getVarArgsFPRSize();
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
- VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(16, DL, getPointerTy()));
+ VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(16, DL, PtrVT));
- VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
- VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
- DAG.getConstant(FPRSize, DL, getPointerTy()));
+ VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
+ VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
+ DAG.getConstant(FPRSize, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
MachinePointerInfo(SV, 16), false, false, 8));
}
// int __gr_offs at offset 24
- SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(24, DL, getPointerTy()));
+ SDValue GROffsAddr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL,
DAG.getConstant(-GPRSize, DL, MVT::i32),
GROffsAddr, MachinePointerInfo(SV, 24), false,
false, 4));
// int __vr_offs at offset 28
- SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(28, DL, getPointerTy()));
+ SDValue VROffsAddr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
MemOps.push_back(DAG.getStore(Chain, DL,
DAG.getConstant(-FPRSize, DL, MVT::i32),
VROffsAddr, MachinePointerInfo(SV, 28), false,
@@ -3987,21 +3994,22 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
unsigned Align = Op.getConstantOperandVal(3);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr,
- MachinePointerInfo(V), false, false, false, 0);
+ SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V),
+ false, false, false, 0);
Chain = VAList.getValue(1);
if (Align > 8) {
assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
- VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(Align - 1, DL, getPointerTy()));
- VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
- DAG.getConstant(-(int64_t)Align, DL, getPointerTy()));
+ VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Align - 1, DL, PtrVT));
+ VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
+ DAG.getConstant(-(int64_t)Align, DL, PtrVT));
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
- uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+ uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
// Scalar integer and FP values smaller than 64 bits are implicitly extended
// up to 64 bits. At the very least, we have to increase the striding of the
@@ -4016,8 +4024,8 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
}
// Increment the pointer, VAList, to the next vaarg
- SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(ArgSize, DL, getPointerTy()));
+ SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(ArgSize, DL, PtrVT));
// Store the incremented VAList to the legalized pointer
SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
false, false, 0);
@@ -4057,8 +4065,8 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned AArch64TargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", AArch64::SP)
.Default(0);
@@ -4079,7 +4087,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- SDValue Offset = DAG.getConstant(8, DL, getPointerTy());
+ SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
@@ -4232,7 +4240,7 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
AArch64TargetLowering::ConstraintType
-AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
@@ -4283,8 +4291,7 @@ AArch64TargetLowering::getSingleConstraintMatchWeight(
std::pair<unsigned, const TargetRegisterClass *>
AArch64TargetLowering::getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI, const std::string &Constraint,
- MVT VT) const {
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
@@ -4320,10 +4327,9 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
unsigned Size = Constraint.size();
if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
- const std::string Reg =
- std::string(&Constraint[2], &Constraint[Size - 1]);
- int RegNo = atoi(Reg.c_str());
- if (RegNo >= 0 && RegNo <= 31) {
+ int RegNo;
+ bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
+ if (!Failed && RegNo >= 0 && RegNo <= 31) {
// v0 - v31 are aliases of q0 - q31.
// By default we'll emit v0-v31 for this unless there's a modifier where
// we'll emit the correct register as well.
@@ -6429,6 +6435,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
+ auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
@@ -6444,7 +6451,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::aarch64_neon_ld4r: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
@@ -6470,7 +6477,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeAllocSize(ArgTy) / 8;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
@@ -6488,7 +6495,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = true;
Info.writeMem = false;
@@ -6501,7 +6508,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = false;
Info.writeMem = true;
@@ -6572,7 +6579,8 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
return true;
const TargetOptions &Options = getTargetMachine().Options;
- EVT VT = getValueType(User->getOperand(0)->getType());
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ EVT VT = getValueType(DL, User->getOperand(0)->getType());
if (isFMAFasterThanFMulAndFAdd(VT) &&
isOperationLegalOrCustom(ISD::FMA, VT) &&
@@ -6637,6 +6645,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
break;
case Instruction::GetElementPtr: {
gep_type_iterator GTI = gep_type_begin(Instr);
+ auto &DL = Ext->getModule()->getDataLayout();
std::advance(GTI, U.getOperandNo());
Type *IdxTy = *GTI;
// This extension will end up with a shift because of the scaling factor.
@@ -6644,7 +6653,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
// Get the shift amount based on the scaling factor:
// log2(sizeof(IdxTy)) - log2(8).
uint64_t ShiftAmt =
- countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+ countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3;
// Is the constant foldable in the shift of the addressing mode?
// I.e., shift amount is between 1 and 4 inclusive.
if (ShiftAmt == 0 || ShiftAmt > 4)
@@ -6708,10 +6717,10 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
assert(Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices");
- const DataLayout *DL = getDataLayout();
+ const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VecTy = Shuffles[0]->getType();
- unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
+ unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
// Skip illegal vector types.
if (VecSize != 64 && VecSize != 128)
@@ -6721,8 +6730,8 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// load integer vectors first and then convert to pointer vectors.
Type *EltTy = VecTy->getVectorElementType();
if (EltTy->isPointerTy())
- VecTy = VectorType::get(DL->getIntPtrType(EltTy),
- VecTy->getVectorNumElements());
+ VecTy =
+ VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
Type *Tys[2] = {VecTy, PtrTy};
@@ -6796,8 +6805,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
Type *EltTy = VecTy->getVectorElementType();
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
- const DataLayout *DL = getDataLayout();
- unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
// Skip illegal vector types.
if (SubVecSize != 64 && SubVecSize != 128)
@@ -6810,7 +6819,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// StN intrinsics don't support pointer vectors as arguments. Convert pointer
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
- Type *IntTy = DL->getIntPtrType(EltTy);
+ Type *IntTy = DL.getIntPtrType(EltTy);
unsigned NumOpElts =
dyn_cast<VectorType>(Op0->getType())->getVectorNumElements();
@@ -6894,8 +6903,8 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// AArch64 has five basic addressing modes:
// reg
@@ -6916,7 +6925,7 @@ bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
uint64_t NumBytes = 0;
if (Ty->isSized()) {
- uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty);
+ uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
@@ -6946,8 +6955,8 @@ bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
return false;
}
-int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
- Type *Ty,
+int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// Scaling factors are not free at all.
// Operands | Rt Latency
@@ -6956,7 +6965,7 @@ int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
// -------------------------------------------
// Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
// Rt, [Xn, Wm, <extend> #imm] |
- if (isLegalAddressingMode(AM, Ty, AS))
+ if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1 if
// it is not equal to 0 or 1.
return AM.Scale != 0 && AM.Scale != 1;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 46298c0e7de1..c73ce1e54b3e 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -233,7 +233,7 @@ public:
APInt &KnownOne, const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- MVT getScalarShiftAmountTy(EVT LHSTy) const override;
+ MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
/// allowsMisalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses of the specified type.
@@ -278,7 +278,8 @@ public:
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
/// getSetCCResultType - Return the ISD::SETCC ValueType
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
@@ -323,7 +324,7 @@ public:
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
/// \brief Return the cost of the scaling factor used in the addressing
@@ -331,7 +332,7 @@ public:
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
- int getScalingFactorCost(const AddrMode &AM, Type *Ty,
+ int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
@@ -471,9 +472,9 @@ private:
std::vector<SDNode *> *Created) const override;
bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
- ConstraintType
- getConstraintType(const std::string &Constraint) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
@@ -483,14 +484,12 @@ private:
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
+ unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "Q")
return InlineAsm::Constraint_Q;
// FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index b73e0958df90..fa1a46acba84 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -894,6 +894,8 @@ def REVXr : OneXRegData<0b011, "rev", bswap>;
def REV32Xr : OneXRegData<0b010, "rev32",
UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
+def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
+
// The bswap commutes with the rotr so we want a pattern for both possible
// orders.
def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
@@ -5283,18 +5285,23 @@ def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
@@ -5309,12 +5316,16 @@ def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 1836682e386e..841af55f7a65 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -90,7 +90,7 @@ AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
BitVector
AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const AArch64FrameLowering *TFI = getFrameLowering(MF);
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
@@ -119,7 +119,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
unsigned Reg) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const AArch64FrameLowering *TFI = getFrameLowering(MF);
switch (Reg) {
default:
@@ -198,11 +198,9 @@ bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool
AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64FrameLowering *TFI = getFrameLowering(MF);
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget()
- .getSubtargetImpl(*MF.getFunction())
- ->getFrameLowering()
- ->getStackAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -213,8 +211,7 @@ AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
unsigned
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-
+ const AArch64FrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
}
@@ -280,7 +277,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// Note that the incoming offset is based on the SP value at function entry,
// so it'll be negative.
MachineFunction &MF = *MI->getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const AArch64FrameLowering *TFI = getFrameLowering(MF);
MachineFrameInfo *MFI = MF.getFrameInfo();
// Estimate an offset from the frame pointer.
@@ -376,8 +373,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MBB.getParent();
const AArch64InstrInfo *TII =
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
- const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
- MF.getSubtarget().getFrameLowering());
+ const AArch64FrameLowering *TFI = getFrameLowering(MF);
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned FrameReg;
@@ -415,7 +411,7 @@ namespace llvm {
unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const AArch64FrameLowering *TFI = getFrameLowering(MF);
switch (RC->getID()) {
default:
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index b9c53998752a..f40293021d74 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -16,11 +16,6 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64-selectiondag-info"
-AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const DataLayout *DL)
- : TargetSelectionDAGInfo(DL) {}
-
-AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {}
-
SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align, bool isVolatile,
@@ -37,8 +32,8 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
const AArch64TargetLowering &TLI = *STI.getTargetLowering();
- EVT IntPtr = TLI.getPointerTy();
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 11932d2b1c22..97421b45b122 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -20,8 +20,6 @@ namespace llvm {
class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit AArch64SelectionDAGInfo(const DataLayout *DL);
- ~AArch64SelectionDAGInfo();
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
SDValue Dst, SDValue Src, SDValue Size,
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 554826b1e08a..486efd6ce3a2 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -49,15 +49,15 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false),
HasCRC(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
- InstrInfo(initializeSubtargetDependencies(FS)),
- TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
+ InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
+ TLInfo(TM, *this) {}
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
unsigned char
AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM) const {
- bool isDecl = GV->isDeclarationForLinker();
+ bool isDef = GV->isStrongDefinitionForLinker();
// MachO large model always goes via a GOT, simply to get a single 8-byte
// absolute relocation on all global addresses.
@@ -66,8 +66,7 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
// The small code mode's direct accesses use ADRP, which cannot necessarily
// produce the value 0 (if the code is above 4GB).
- if (TM.getCodeModel() == CodeModel::Small &&
- GV->isWeakForLinker() && isDecl) {
+ if (TM.getCodeModel() == CodeModel::Small && GV->hasExternalWeakLinkage()) {
// In PIC mode use the GOT, but in absolute mode use a constant pool load.
if (TM.getRelocationModel() == Reloc::Static)
return AArch64II::MO_CONSTPOOL;
@@ -85,8 +84,7 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
// defined could end up in unexpected places. Use a GOT.
if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) {
if (isTargetMachO())
- return (isDecl || GV->isWeakForLinker()) ? AArch64II::MO_GOT
- : AArch64II::MO_NO_FLAG;
+ return isDef ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT;
else
// No need to go through the GOT for local symbols on ELF.
return GV->hasLocalLinkage() ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index fc91c94351cc..e085cca35f1c 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -181,8 +181,8 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- EVT SrcTy = TLI->getValueType(Src);
- EVT DstTy = TLI->getValueType(Dst);
+ EVT SrcTy = TLI->getValueType(DL, Src);
+ EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src);
@@ -265,7 +265,7 @@ unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index != -1U) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
@@ -289,7 +289,7 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost(
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -364,8 +364,8 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
};
- EVT SelCondTy = TLI->getValueType(CondTy);
- EVT SelValTy = TLI->getValueType(ValTy);
+ EVT SelCondTy = TLI->getValueType(DL, CondTy);
+ EVT SelValTy = TLI->getValueType(DL, ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
int Idx =
ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
@@ -380,7 +380,7 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isIntegerTy(64)) {
@@ -416,7 +416,7 @@ unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned NumElts = VecTy->getVectorNumElements();
Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);
+ unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
// ldN/stN only support legal vector types of size 64 or 128 in bits.
if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 4dabdadd8eeb..444d3ccc15e1 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -31,7 +31,6 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
typedef TargetTransformInfo TTI;
friend BaseT;
- const AArch64TargetMachine *TM;
const AArch64Subtarget *ST;
const AArch64TargetLowering *TLI;
@@ -50,30 +49,15 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
public:
explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F)
- : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)),
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
AArch64TTIImpl(const AArch64TTIImpl &Arg)
- : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST),
- TLI(Arg.TLI) {}
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
AArch64TTIImpl(AArch64TTIImpl &&Arg)
- : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)),
- ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {}
- AArch64TTIImpl &operator=(const AArch64TTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- TM = RHS.TM;
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- AArch64TTIImpl &operator=(AArch64TTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- TM = std::move(RHS.TM);
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
/// \name Scalar TTI Implementations
/// @{
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 359c2e734e21..db9fb0e775df 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -228,7 +228,7 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
static MCSymbolizer *
-createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
+createAArch64ExternalSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp,
void *DisInfo, MCContext *Ctx,
std::unique_ptr<MCRelocationInfo> &&RelInfo) {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index b5b1d1f9e19c..16d53569b231 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -199,7 +199,7 @@ MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
MCTargetStreamer *
createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
const Triple &TT = STI.getTargetTriple();
- if (TT.getObjectFormat() == Triple::ELF)
+ if (TT.isOSBinFormatELF())
return new AArch64TargetELFStreamer(S);
return nullptr;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 099d1b01c339..9f7bed0d3b12 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -42,16 +42,13 @@ static MCInstrInfo *createAArch64MCInstrInfo() {
static MCSubtargetInfo *
createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
-
if (CPU.empty())
CPU = "generic";
- InitAArch64MCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createAArch64MCSubtargetInfoImpl(TT, CPU, FS);
}
-static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
+static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
InitAArch64MCRegisterInfo(X, AArch64::LR);
return X;
@@ -75,11 +72,11 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createAArch64MCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
- Triple TheTriple(TT);
- assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) &&
+ assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()) &&
"Only expect Darwin and ELF targets");
if (CM == CodeModel::Default)
@@ -94,7 +91,7 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
"Only small and large code models are allowed on AArch64");
// AArch64 Darwin is always PIC.
- if (TheTriple.isOSDarwin())
+ if (TT.isOSDarwin())
RM = Reloc::PIC_;
// On ELF platforms the default static relocation model has a smart enough
// linker to cope with referencing external symbols defined in a shared
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 569ad3844b25..ef8ef6268548 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -98,6 +98,16 @@ def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
"true",
"Enable SI load/store optimizer pass">;
+// Performance debugging feature. Allow using DS instruction immediate
+// offsets even if the base pointer can't be proven to be base. On SI,
+// base pointer values that won't give the same result as a 16-bit add
+// are not safe to fold, but this will override the conservative test
+// for the base pointer.
+def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding",
+ "EnableUnsafeDSOffsetFolding",
+ "true",
+ "Force using DS instruction immediate offsets on SI">;
+
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
"FlatAddressSpace",
"true",
diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 0b426bc63dd5..ad267d350850 100644
--- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -22,7 +22,6 @@ using namespace llvm;
namespace {
class AMDGPUAlwaysInline : public ModulePass {
-
static char ID;
public:
@@ -36,10 +35,9 @@ public:
char AMDGPUAlwaysInline::ID = 0;
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+ std::vector<Function *> FuncsToClone;
- std::vector<Function*> FuncsToClone;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function &F = *I;
+ for (Function &F : M) {
if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
!F.hasFnAttribute(Attribute::NoInline))
FuncsToClone.push_back(&F);
@@ -49,12 +47,11 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
ValueToValueMapTy VMap;
Function *NewFunc = CloneFunction(F, VMap, false);
NewFunc->setLinkage(GlobalValue::InternalLinkage);
- F->getParent()->getFunctionList().push_back(NewFunc);
+ M.getFunctionList().push_back(NewFunc);
F->replaceAllUsesWith(NewFunc);
}
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function &F = *I;
+ for (Function &F : M) {
if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) {
F.addFnAttr(Attribute::AlwaysInline);
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index df4461eac4db..37b77d778d9f 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -110,8 +110,11 @@ private:
SDValue &Offset, SDValue &GLC) const;
SDNode *SelectAddrSpaceCast(SDNode *N);
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Omod) const;
@@ -859,7 +862,8 @@ bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
(OffsetBits == 8 && !isUInt<8>(Offset)))
return false;
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
+ Subtarget->unsafeDSOffsetFoldingEnabled())
return true;
// On Southern Islands instruction with a negative base value and an offset
@@ -1316,6 +1320,12 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ bool Res = SelectVOP3Mods(In, Src, SrcMods);
+ return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
SDValue &SrcMods, SDValue &Clamp,
SDValue &Omod) const {
@@ -1327,6 +1337,16 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
+ SDValue &SrcMods, SDValue &Clamp,
+ SDValue &Omod) const {
+ bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
+
+ return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
+ cast<ConstantSDNode>(Clamp)->isNullValue() &&
+ cast<ConstantSDNode>(Omod)->isNullValue();
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
SDValue &SrcMods,
SDValue &Omod) const {
@@ -1351,18 +1371,14 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
do {
IsModified = false;
// Go over all selected nodes and try to fold them a bit more
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
-
- SDNode *Node = I;
-
- MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+ for (SDNode &Node : CurDAG->allnodes()) {
+ MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
if (!MachineNode)
continue;
SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
- if (ResNode != Node) {
- ReplaceUses(Node, ResNode);
+ if (ResNode != &Node) {
+ ReplaceUses(&Node, ResNode);
IsModified = true;
}
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d56838ec2019..3a65f3b56146 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -406,6 +406,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
+ setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SELECT_CC);
@@ -444,7 +445,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
// Target Information
//===----------------------------------------------------------------------===//
-MVT AMDGPUTargetLowering::getVectorIdxTy() const {
+MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
return MVT::i32;
}
@@ -545,9 +546,8 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
}
bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
- const DataLayout *DL = getDataLayout();
- unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType());
- unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType());
+ unsigned SrcSize = Src->getScalarSizeInBits();
+ unsigned DestSize = Dest->getScalarSizeInBits();
return SrcSize == 32 && DestSize == 64;
}
@@ -697,7 +697,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
const SDValue &InitPtr,
SDValue Chain,
SelectionDAG &DAG) const {
- const DataLayout *TD = getDataLayout();
+ const DataLayout &TD = DAG.getDataLayout();
SDLoc DL(InitPtr);
Type *InitTy = Init->getType();
@@ -705,20 +705,20 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
EVT VT = EVT::getEVT(InitTy);
PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
return DAG.getStore(Chain, DL, DAG.getConstant(*CI, DL, VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(InitTy));
+ MachinePointerInfo(UndefValue::get(PtrTy)), false,
+ false, TD.getPrefTypeAlignment(InitTy));
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
EVT VT = EVT::getEVT(CFP->getType());
PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, DL, VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(CFP->getType()));
+ MachinePointerInfo(UndefValue::get(PtrTy)), false,
+ false, TD.getPrefTypeAlignment(CFP->getType()));
}
if (StructType *ST = dyn_cast<StructType>(InitTy)) {
- const StructLayout *SL = TD->getStructLayout(ST);
+ const StructLayout *SL = TD.getStructLayout(ST);
EVT PtrVT = InitPtr.getValueType();
SmallVector<SDValue, 8> Chains;
@@ -745,7 +745,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
else
llvm_unreachable("Unexpected type");
- unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType());
+ unsigned EltSize = TD.getTypeAllocSize(SeqTy->getElementType());
SmallVector<SDValue, 8> Chains;
for (unsigned i = 0; i < NumElements; ++i) {
SDValue Offset = DAG.getConstant(i * EltSize, DL, PtrVT);
@@ -762,8 +762,8 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
EVT VT = EVT::getEVT(InitTy);
PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(InitTy));
+ MachinePointerInfo(UndefValue::get(PtrTy)), false,
+ false, TD.getPrefTypeAlignment(InitTy));
}
Init->dump();
@@ -785,7 +785,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue Op,
SelectionDAG &DAG) const {
- const DataLayout *TD = getDataLayout();
+ const DataLayout &DL = DAG.getDataLayout();
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
@@ -801,7 +801,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
unsigned Offset;
if (MFI->LocalMemoryObjects.count(GV) == 0) {
- uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
Offset = MFI->LDSSize;
MFI->LocalMemoryObjects[GV] = Offset;
// XXX: Account for alignment?
@@ -811,16 +811,16 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
}
return DAG.getConstant(Offset, SDLoc(Op),
- getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
+ getPointerTy(DL, AMDGPUAS::LOCAL_ADDRESS));
}
case AMDGPUAS::CONSTANT_ADDRESS: {
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
Type *EltType = GV->getType()->getElementType();
- unsigned Size = TD->getTypeAllocSize(EltType);
- unsigned Alignment = TD->getPrefTypeAlignment(EltType);
+ unsigned Size = DL.getTypeAllocSize(EltType);
+ unsigned Alignment = DL.getPrefTypeAlignment(EltType);
- MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS);
- MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
+ MVT PrivPtrVT = getPointerTy(DL, AMDGPUAS::PRIVATE_ADDRESS);
+ MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);
@@ -1653,7 +1653,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
// fb = fabs(fb);
fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), VT);
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
// int cv = fr >= fb;
SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
@@ -1960,7 +1960,8 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
@@ -2020,7 +2021,8 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
@@ -2051,7 +2053,8 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
@@ -2081,7 +2084,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const
SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
@@ -2100,8 +2104,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const
const SDValue One = DAG.getConstant(1, SL, MVT::i32);
const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
-
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
@@ -2172,7 +2176,8 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
@@ -2411,6 +2416,33 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
SN->getBasePtr(), SN->getMemOperand());
}
+SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ // i64 (shl x, 32) -> (build_pair 0, x)
+
+ // Doing this with moves theoretically helps MI optimizations that understand
+ // copies. 2 v_mov_b32_e32 will have the same code size / cycle count as
+ // v_lshl_b64. In the SALU case, I think this is slightly worse since it
+ // doubles the code size and I'm unsure about cycle count.
+ const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!RHS || RHS->getZExtValue() != 32)
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+
+ SDLoc SL(N);
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Extract low 32-bits.
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
+
+ const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+ return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Zero, Lo);
+}
+
SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
@@ -2448,17 +2480,24 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
SDLoc DL(N);
switch(N->getOpcode()) {
- default: break;
- case ISD::MUL:
- return performMulCombine(N, DCI);
- case AMDGPUISD::MUL_I24:
- case AMDGPUISD::MUL_U24: {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- simplifyI24(N0, DCI);
- simplifyI24(N1, DCI);
- return SDValue();
- }
+ default:
+ break;
+ case ISD::SHL: {
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ break;
+
+ return performShlCombine(N, DCI);
+ }
+ case ISD::MUL:
+ return performMulCombine(N, DCI);
+ case AMDGPUISD::MUL_I24:
+ case AMDGPUISD::MUL_U24: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ simplifyI24(N0, DCI);
+ simplifyI24(N1, DCI);
+ return SDValue();
+ }
case ISD::SELECT: {
SDValue Cond = N->getOperand(0);
if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
@@ -2644,6 +2683,18 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
return DAG.getRegister(VirtualRegister, VT);
}
+uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
+ const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
+ uint64_t ArgOffset = MFI->ABIArgOffset;
+ switch (Param) {
+ case GRID_DIM:
+ return ArgOffset;
+ case GRID_OFFSET:
+ return ArgOffset + 4;
+ }
+ llvm_unreachable("unexpected implicit parameter type");
+}
+
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index fbb7d3c88437..478b2035fd75 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -65,6 +65,7 @@ private:
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
protected:
@@ -123,7 +124,7 @@ public:
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
- MVT getVectorIdxTy() const override;
+ MVT getVectorIdxTy(const DataLayout &) const override;
bool isSelectSupported(SelectSupportKind) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
@@ -207,6 +208,16 @@ public:
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const;
+
+ enum ImplicitParameter {
+ GRID_DIM,
+ GRID_OFFSET
+ };
+
+ /// \brief Helper function that returns the byte offset of the given
+ /// type of implicit parameter.
+ unsigned getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
+ const ImplicitParameter Param) const;
};
namespace AMDGPUISD {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 0779d1d786b2..bd5abc4f546e 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -69,6 +69,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
+ EnableUnsafeDSOffsetFolding(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 30f50eb1d2f3..90831bfb4458 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -76,6 +76,7 @@ private:
bool EnablePromoteAlloca;
bool EnableIfCvt;
bool EnableLoadStoreOpt;
+ bool EnableUnsafeDSOffsetFolding;
unsigned WavefrontSize;
bool CFALUBug;
int LocalMemorySize;
@@ -222,6 +223,10 @@ public:
return EnableLoadStoreOpt;
}
+ bool unsafeDSOffsetFoldingEnabled() const {
+ return EnableUnsafeDSOffsetFolding;
+ }
+
unsigned getWavefrontSize() const {
return WavefrontSize;
}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a9a911a8efed..2297b52b423c 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -156,8 +156,10 @@ public:
} // End of anonymous namespace
TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &F) { return TargetTransformInfo(AMDGPUTTIImpl(this)); });
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(
+ AMDGPUTTIImpl(this, F.getParent()->getDataLayout()));
+ });
}
void AMDGPUPassConfig::addIRPasses() {
@@ -269,6 +271,7 @@ void GCNPassConfig::addPreRegAlloc() {
// also need extra copies to the address operand to be eliminated.
initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
+ insertPass(&MachineSchedulerID, &RegisterCoalescerID);
}
addPass(createSIShrinkInstructionsPass(), false);
addPass(createSIFixSGPRLiveRangesPass(), false);
@@ -280,10 +283,10 @@ void GCNPassConfig::addPostRegAlloc() {
}
void GCNPassConfig::addPreSched2() {
- addPass(createSIInsertWaits(*TM), false);
}
void GCNPassConfig::addPreEmitPass() {
+ addPass(createSIInsertWaits(*TM), false);
addPass(createSILowerControlFlowPass(*TM), false);
}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 791c84e6f28b..dee0a69d1e68 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -37,8 +37,9 @@ class AMDGPUTTIImpl : public BasicTTIImplBase<AMDGPUTTIImpl> {
const AMDGPUTargetLowering *getTLI() const { return TLI; }
public:
- explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM)
- : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+ explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const DataLayout &DL)
+ : BaseT(TM, DL), ST(TM->getSubtargetImpl()),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)
@@ -46,18 +47,6 @@ public:
AMDGPUTTIImpl(AMDGPUTTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- AMDGPUTTIImpl &operator=(const AMDGPUTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- AMDGPUTTIImpl &operator=(AMDGPUTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
bool hasBranchDivergence() { return true; }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 7172e4bb9335..c709741f3777 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -44,7 +44,7 @@ static MCInstrInfo *createAMDGPUMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitAMDGPUMCRegisterInfo(X, 0);
return X;
@@ -52,14 +52,13 @@ static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *
createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- MCSubtargetInfo * X = new MCSubtargetInfo();
- InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS);
}
-static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
+static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
X->initMCCodeGenInfo(RM, CM, OL);
return X;
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 8357b6d9d0ed..4e4d554f0ee7 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -815,8 +815,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
- case Intrinsic::AMDGPU_read_workdim:
- return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
+ case Intrinsic::AMDGPU_read_workdim: {
+ uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
+ return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
+ }
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
@@ -897,8 +899,9 @@ SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
for (unsigned i = 0, e = VecVT.getVectorNumElements();
i != e; ++i) {
- Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
- DAG.getConstant(i, DL, getVectorIdxTy())));
+ Args.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
+ DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
}
return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
@@ -1459,22 +1462,17 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue Ptr = Op.getOperand(1);
SDValue LoweredLoad;
- SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
- if (Ret.getNode()) {
- SDValue Ops[2] = {
- Ret,
- Chain
- };
- return DAG.getMergeValues(Ops, DL);
- }
+ if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
+ return Ret;
// Lower loads constant address space global variable loads
if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
isa<GlobalVariable>(GetUnderlyingObject(
- LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
+ LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
- SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
- getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
+ SDValue Ptr = DAG.getZExtOrTrunc(
+ LoadNode->getBasePtr(), DL,
+ getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
DAG.getConstant(2, DL, MVT::i32));
return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
@@ -1702,7 +1700,8 @@ SDValue R600TargetLowering::LowerFormalArguments(
return Chain;
}
-EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index c06d3c4fd309..4dbac97af2a1 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -38,7 +38,9 @@ public:
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
- EVT getSetCCResultType(LLVMContext &, EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT VT) const override;
+
private:
unsigned Gen;
/// Each OpenCL kernel has nine implicit parameters that are stored in the
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index d14e37a64612..c2887255cc11 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -126,11 +126,42 @@ static bool updateOperand(FoldCandidate &Fold,
return false;
}
+static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,
+ const MachineInstr *MI) {
+ for (auto Candidate : FoldList) {
+ if (Candidate.UseMI == MI)
+ return true;
+ }
+ return false;
+}
+
static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold,
const SIInstrInfo *TII) {
if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
+
+ // Special case for v_mac_f32_e64 if we are trying to fold into src2
+ unsigned Opc = MI->getOpcode();
+ if (Opc == AMDGPU::V_MAC_F32_e64 &&
+ (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
+ // Check if changing this to a v_mad_f32 instruction will allow us to
+ // fold the operand.
+ MI->setDesc(TII->get(AMDGPU::V_MAD_F32));
+ bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
+ if (FoldAsMAD) {
+ MI->untieRegOperand(OpNo);
+ return true;
+ }
+ MI->setDesc(TII->get(Opc));
+ }
+
+ // If we are already folding into another operand of MI, then
+ // we can't commute the instruction, otherwise we risk making the
+ // other fold illegal.
+ if (isUseMIInFoldList(FoldList, MI))
+ return false;
+
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
unsigned CommuteIdx0;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index ead1a3743473..dd818a9ba746 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -254,8 +254,9 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
return false;
}
-bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty, unsigned AS) const {
+bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
@@ -416,7 +417,7 @@ static EVT toIntegerVT(EVT VT) {
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc SL, SDValue Chain,
unsigned Offset, bool Signed) const {
- const DataLayout *DL = getDataLayout();
+ const DataLayout &DL = DAG.getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
@@ -425,16 +426,16 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- MVT PtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
+ MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
DAG.getConstant(Offset, SL, PtrVT));
- SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
+ SDValue PtrOffset = DAG.getUNDEF(PtrVT);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
- unsigned Align = DL->getABITypeAlignment(Ty);
+ unsigned Align = DL.getABITypeAlignment(Ty);
if (VT != MemVT && VT.isFloatingPoint()) {
// Do an integer load and convert.
@@ -451,7 +452,12 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
true, // isNonTemporal
true, // isInvariant
Align); // Alignment
- return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load);
+ SDValue Ops[] = {
+ DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load),
+ Load.getValue(1)
+ };
+
+ return DAG.getMergeValues(Ops, SL);
}
ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
@@ -569,6 +575,8 @@ SDValue SITargetLowering::LowerFormalArguments(
AnalyzeFormalArguments(CCInfo, Splits);
+ SmallVector<SDValue, 16> Chains;
+
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
@@ -587,8 +595,9 @@ SDValue SITargetLowering::LowerFormalArguments(
VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
- SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
+ SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
Offset, Ins[i].Flags.isSExt());
+ Chains.push_back(Arg.getValue(1));
const PointerType *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
@@ -614,7 +623,8 @@ SDValue SITargetLowering::LowerFormalArguments(
Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
&AMDGPU::SReg_64RegClass);
Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
- InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+ SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ InVals.push_back(Copy);
continue;
}
@@ -634,7 +644,9 @@ SDValue SITargetLowering::LowerFormalArguments(
for (unsigned j = 1; j != NumElements; ++j) {
Reg = ArgLocs[ArgIdx++].getLocReg();
Reg = MF.addLiveIn(Reg, RC);
- Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+
+ SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ Regs.push_back(Copy);
}
// Fill up the missing vector elements
@@ -653,7 +665,11 @@ SDValue SITargetLowering::LowerFormalArguments(
AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
}
- return Chain;
+
+ if (Chains.empty())
+ return Chain;
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
@@ -695,14 +711,15 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
return true;
}
-EVT SITargetLowering::getSetCCResultType(LLVMContext &Ctx, EVT VT) const {
+EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
+ EVT VT) const {
if (!VT.isVector()) {
return MVT::i1;
}
return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
}
-MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
+MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT) const {
return MVT::i32;
}
@@ -888,7 +905,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDLoc DL(GSD);
const GlobalValue *GV = GSD->getGlobal();
- MVT PtrVT = getPointerTy(GSD->getAddressSpace());
+ MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace());
SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
@@ -926,6 +943,7 @@ SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ auto MFI = MF.getInfo<SIMachineFunctionInfo>();
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
@@ -964,8 +982,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::AMDGPU_read_workdim:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
- false);
+ getImplicitParameterOffset(MFI, GRID_DIM), false);
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
@@ -1213,7 +1230,8 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
@@ -1411,7 +1429,7 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
unsigned AS = Load->getAddressSpace();
unsigned Align = Load->getAlignment();
Type *Ty = LoadVT.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
// Don't try to replace the load if we have to expand it due to alignment
// problems. Otherwise we will end up scalarizing the load, and trying to
@@ -2212,9 +2230,8 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
std::pair<unsigned, const TargetRegisterClass *>
SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint_,
+ StringRef Constraint,
MVT VT) const {
- StringRef Constraint(Constraint_);
if (Constraint == "r") {
switch(VT.SimpleTy) {
default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index a956b013bdb1..635b4edc89de 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -62,8 +62,8 @@ public:
bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
EVT /*VT*/) const override;
- bool isLegalAddressingMode(const AddrMode &AM,
- Type *Ty, unsigned AS) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
@@ -90,8 +90,9 @@ public:
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const override;
bool enableAggressiveFMAFusion(EVT VT) const override;
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
- MVT getScalarShiftAmountTy(EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
@@ -114,9 +115,9 @@ public:
SDLoc DL,
SDValue Ptr) const;
- std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI,
- const std::string &Constraint, MVT VT) const override;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
};
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index eb96bd0227b2..18910615bebe 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -227,9 +227,8 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
uint8_t Offset0 = Offset0Imm->getImm();
uint8_t Offset1 = Offset1Imm->getImm();
- assert(Offset1 > Offset0);
- if (Offset1 - Offset0 == 1) {
+ if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
// Each of these offsets is in element sized units, so we need to convert
// to bytes of the individual reads.
@@ -924,7 +923,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
return false;
unsigned Opc = UseMI->getOpcode();
- if (Opc == AMDGPU::V_MAD_F32) {
+ if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) {
// Don't fold if we are using source modifiers. The new VOP2 instructions
// don't have them.
if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
@@ -963,9 +962,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
// instead of having to modify in place.
// Remove these first since they are at the end.
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::omod));
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::clamp));
unsigned Src1Reg = Src1->getReg();
@@ -980,6 +979,14 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
Src1->setSubReg(Src2SubReg);
Src1->setIsKill(Src2->isKill());
+ if (Opc == AMDGPU::V_MAC_F32_e64) {
+ UseMI->untieRegOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+ }
+
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
+ AMDGPU::OpName::src2));
+ // ChangingToImmediate adds Src2 back to the instruction.
Src2->ChangeToImmediate(Imm);
removeModOperands(*UseMI);
@@ -1010,11 +1017,17 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
// instead of having to modify in place.
// Remove these first since they are at the end.
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::omod));
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+ UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::clamp));
+ if (Opc == AMDGPU::V_MAC_F32_e64) {
+ UseMI->untieRegOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+ }
+
+ // ChangingToImmediate adds Src2 back to the instruction.
Src2->ChangeToImmediate(Imm);
// These come before src2.
@@ -1126,6 +1139,38 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
return false;
}
+MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
+ MachineBasicBlock::iterator &MI,
+ LiveVariables *LV) const {
+
+ switch (MI->getOpcode()) {
+ default: return nullptr;
+ case AMDGPU::V_MAC_F32_e64: break;
+ case AMDGPU::V_MAC_F32_e32: {
+ const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
+ if (Src0->isImm() && !isInlineConstant(*Src0, 4))
+ return nullptr;
+ break;
+ }
+ }
+
+ const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst);
+ const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1);
+ const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2);
+
+ return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
+ .addOperand(*Dst)
+ .addImm(0) // Src0 mods
+ .addOperand(*Src0)
+ .addImm(0) // Src1 mods
+ .addOperand(*Src1)
+ .addImm(0) // Src mods
+ .addOperand(*Src2)
+ .addImm(0) // clamp
+ .addImm(0); // omod
+}
+
bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
int64_t SVal = Imm.getSExtValue();
if (SVal >= -16 && SVal <= 64)
@@ -1625,7 +1670,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
if (MO->isReg()) {
assert(DefinedRC);
- const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg());
+ const TargetRegisterClass *RC =
+ TargetRegisterInfo::isVirtualRegister(MO->getReg()) ?
+ MRI.getRegClass(MO->getReg()) :
+ RI.getPhysRegClass(MO->getReg());
// In order to be legal, the common sub-class must be equal to the
// class of the current operand. For example:
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 0382272068d2..015ea12d4598 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -144,6 +144,10 @@ public:
unsigned getMachineCSELookAheadLimit() const override { return 500; }
+ MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
+ MachineBasicBlock::iterator &MI,
+ LiveVariables *LV) const override;
+
bool isSALU(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SALU;
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index fcb58d5da3b0..b39a78714640 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -529,9 +529,11 @@ def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
+def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">;
def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
+def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
//===----------------------------------------------------------------------===//
// SI assembler operands
@@ -1113,6 +1115,13 @@ def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
field string Asm = "$dst, $src0, $vsrc1, $src2";
}
+def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
+ let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
+ HasModifiers>.ret;
+ let Asm32 = getAsm32<2>.ret;
+ let Asm64 = getAsm64<2, HasModifiers>.ret;
+}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 8c8d836776db..1ee63c675822 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1488,7 +1488,10 @@ defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
-defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
+let Constraints = "$dst = $src2", DisableEncoding="$src2",
+ isConvertibleToThreeAddress = 1 in {
+defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_MAC>;
+}
} // End isCommutable = 1
defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">;
@@ -2206,6 +2209,15 @@ def : Pat <
(V_CNDMASK_B32_e64 $src2, $src1, $src0)
>;
+// Pattern for V_MAC_F32
+def : Pat <
+ (fmad (VOP3NoMods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
+ (VOP3NoMods f32:$src1, i32:$src1_modifiers),
+ (VOP3NoMods f32:$src2, i32:$src2_modifiers)),
+ (V_MAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
+ $src2_modifiers, $src2, $clamp, $omod)
+>;
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 9b1d256dc5a8..1bdb1f0ee9f9 100644
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -214,12 +214,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
// cases, like vectors of pointers.
const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
- unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
- unsigned DestReg1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst)->getReg();
+ const MachineOperand *Dest0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst);
+ const MachineOperand *Dest1 = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst);
unsigned Offset0
- = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
+ = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
unsigned Offset1
= TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
@@ -258,20 +257,43 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
- updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
- updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
- LIS->RemoveMachineInstrFromMaps(I);
- // Replacing Paired in the maps with Read2 allows us to avoid updating the
- // live range for the m0 register.
- LIS->ReplaceMachineInstrInMaps(Paired, Read2);
+ const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+
+ // Copy to the old destination registers.
+ MachineInstr *Copy0 = BuildMI(*MBB, I, DL, CopyDesc)
+ .addOperand(*Dest0) // Copy to same destination including flags and sub reg.
+ .addReg(DestReg, 0, SubRegIdx0);
+ MachineInstr *Copy1 = BuildMI(*MBB, I, DL, CopyDesc)
+ .addOperand(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
+
+ LIS->InsertMachineInstrInMaps(Read2);
+
+ // repairLiveintervalsInRange() doesn't handle physical register, so we have
+ // to update the M0 range manually.
+ SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
+ LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
+ LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
+ bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
+
+ // The new write to the original destination register is now the copy. Steal
+ // the old SlotIndex.
+ LIS->ReplaceMachineInstrInMaps(I, Copy0);
+ LIS->ReplaceMachineInstrInMaps(Paired, Copy1);
+
I->eraseFromParent();
Paired->eraseFromParent();
LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
LIS->shrinkToUses(&AddrRegLI);
- LIS->getInterval(DestReg); // Create new LI
+ LIS->createAndComputeVirtRegInterval(DestReg);
+
+ if (UpdateM0Range) {
+ SlotIndex Read2Index = LIS->getInstructionIndex(Read2);
+ M0Segment->end = Read2Index.getRegSlot();
+ }
DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
return Read2.getInstr();
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 587ea63d6796..d23b92edef33 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -53,7 +53,6 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
- MRI.setPhysRegUsed(LaneVGPR);
// Add this register as live-in to all blocks to avoid machine verifer
// complaining about use of an undefined physical register.
diff --git a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
index 0a7f684552f0..b086d2ed6652 100644
--- a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
+++ b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
@@ -91,7 +91,6 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
if (ScratchOffsetReg != AMDGPU::NoRegister) {
// Found an SGPR to use
- MRI.setPhysRegUsed(ScratchOffsetReg);
BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
.addReg(ScratchOffsetPreloadReg);
} else {
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index db2ff0b1f952..ce4acafac9fa 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -499,7 +499,7 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
- if (!MRI.isPhysRegUsed(*I))
+ if (MRI.reg_nodbg_empty(*I))
return *I;
}
return AMDGPU::NoRegister;
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 51e72cdb5f9e..5d00bdd6a9bb 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -94,8 +94,20 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
// is vcc. We should handle this the same way we handle vopc, by addding
// a register allocation hint pre-regalloc and then do the shrining
// post-regalloc.
- if (Src2)
- return false;
+ if (Src2) {
+ switch (MI.getOpcode()) {
+ default: return false;
+
+ case AMDGPU::V_MAC_F32_e64:
+ if (!isVGPR(Src2, TRI, MRI) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
+ return false;
+ break;
+
+ case AMDGPU::V_CNDMASK_B32_e64:
+ break;
+ }
+ }
const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
const MachineOperand *Src1Mod =
@@ -149,7 +161,7 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
return;
// Try to fold Src0
- if (Src0.isReg()) {
+ if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
unsigned Reg = Src0.getReg();
MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
if (Def && Def->isMoveImmediate()) {
@@ -243,6 +255,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
+ // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
+ // instructions.
+ const MachineOperand *Src2 =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ if (!Src2->isReg())
+ continue;
+ unsigned SReg = Src2->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+ MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
+ continue;
+ }
+ if (SReg != AMDGPU::VCC)
+ continue;
+ }
+
// We can shrink this instruction
DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
@@ -259,6 +287,11 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (Src1)
Inst32.addOperand(*Src1);
+ const MachineOperand *Src2 =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ if (Src2)
+ Inst32.addOperand(*Src2);
+
++NumInstructionsShrunk;
MI.eraseFromParent();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 96b4742da2bb..ef609a66d032 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -150,6 +150,10 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
"NaCl trap">;
+def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
+ "Generate calls via indirect call "
+ "instructions">;
+
// ARM ISAs.
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b1a11d626bda..9f43e732bd73 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1230,8 +1230,7 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
Reloc::Model RM = MF.getTarget().getRelocationModel();
if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
- assert(getSubtarget().getTargetTriple().getObjectFormat() ==
- Triple::MachO &&
+ assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
"LOAD_STACK_GUARD currently supported only for MachO.");
expandLoadStackGuard(MI, RM);
MI->getParent()->erase(MI);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 3f79a9b53d70..e7d5be7753e4 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -127,7 +127,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
- const TargetFrameLowering *TFI = STI.getFrameLowering();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
@@ -194,7 +194,7 @@ unsigned
ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
- const TargetFrameLowering *TFI = STI.getFrameLowering();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
switch (RC->getID()) {
default:
@@ -302,7 +302,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
// When outgoing call frames are so large that we adjust the stack pointer
// around the call, we can no longer use the stack pointer to reach the
@@ -333,6 +333,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
// We can't realign the stack if:
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
@@ -347,7 +348,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return false;
// We may also need a base pointer if there are dynamic allocas or stack
// pointer adjustments around calls.
- if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF))
+ if (TFI->hasReservedCallFrame(MF))
return true;
// A base pointer is required and allowed. Check that it isn't too late to
// reserve it.
@@ -357,9 +358,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
const Function *F = MF.getFunction();
- unsigned StackAlign =
- MF.getSubtarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttribute(Attribute::StackAlignment));
@@ -378,7 +379,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
- const TargetFrameLowering *TFI = STI.getFrameLowering();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
if (TFI->hasFP(MF))
return getFramePointerReg(STI);
@@ -517,7 +518,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// Note that the incoming offset is based on the SP value at function entry,
// so it'll be negative.
MachineFunction &MF = *MI->getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -694,8 +695,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MBB.getParent();
const ARMBaseInstrInfo &TII =
*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>(
- MF.getSubtarget().getFrameLowering());
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
"This eliminateFrameIndex does not support Thumb1!");
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 7dd21ecbe91b..27cf06b995a0 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -142,6 +142,9 @@ def CC_ARM_AAPCS : CallingConv<[
// Handles byval parameters.
CCIfByVal<CCPassByVal<4, 4>>,
+ // The 'nest' parameter, if any, is passed in R12.
+ CCIfNest<CCAssignToReg<[R12]>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 4175b4af86e6..fdd0763ea608 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -49,8 +49,6 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-extern cl::opt<bool> EnableARMLongCalls;
-
namespace {
// All possible address modes, plus some.
@@ -685,7 +683,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
}
unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple()) return 0;
@@ -732,7 +730,7 @@ unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
}
bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
- EVT evt = TLI.getValueType(Ty, true);
+ EVT evt = TLI.getValueType(DL, Ty, true);
// Only handle simple types.
if (evt == MVT::Other || !evt.isSimple()) return false;
@@ -786,12 +784,13 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
return ARMComputeAddress(U->getOperand(0), Addr);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return ARMComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return ARMComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::GetElementPtr: {
@@ -1365,7 +1364,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt) {
Type *Ty = Src1Value->getType();
- EVT SrcEVT = TLI.getValueType(Ty, true);
+ EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple()) return false;
MVT SrcVT = SrcEVT.getSimpleVT();
@@ -1557,7 +1556,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
return false;
Value *Src = I->getOperand(0);
- EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
@@ -1750,7 +1749,7 @@ bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
}
bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
// We can get here in the case when we have a binary operation on a non-legal
// type and the target independent selector doesn't know how to handle it.
@@ -1790,7 +1789,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
}
bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
- EVT FPVT = TLI.getValueType(I->getType(), true);
+ EVT FPVT = TLI.getValueType(DL, I->getType(), true);
if (!FPVT.isSimple()) return false;
MVT VT = FPVT.getSimpleVT();
@@ -2095,7 +2094,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -2122,7 +2121,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
return false;
unsigned SrcReg = Reg + VA.getValNo();
- EVT RVEVT = TLI.getValueType(RV->getType());
+ EVT RVEVT = TLI.getValueType(DL, RV->getType());
if (!RVEVT.isSimple()) return false;
MVT RVVT = RVEVT.getSimpleVT();
MVT DestVT = VA.getValVT();
@@ -2173,7 +2172,7 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
// Manually compute the global's type to avoid building it when unnecessary.
Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
- EVT LCREVT = TLI.getValueType(GVTy);
+ EVT LCREVT = TLI.getValueType(DL, GVTy);
if (!LCREVT.isSimple()) return 0;
GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
@@ -2246,19 +2245,19 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
return false;
unsigned CalleeReg = 0;
- if (EnableARMLongCalls) {
+ if (Subtarget->genLongCalls()) {
CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
if (CalleeReg == 0) return false;
}
// Issue the call.
- unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
+ unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DbgLoc, TII.get(CallOpc));
// BL / BLX don't take a predicate, but tBL / tBLX do.
if (isThumb2)
AddDefaultPred(MIB);
- if (EnableARMLongCalls)
+ if (Subtarget->genLongCalls())
MIB.addReg(CalleeReg);
else
MIB.addExternalSymbol(TLI.getLibcallName(Call));
@@ -2380,7 +2379,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
bool UseReg = false;
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
- if (!GV || EnableARMLongCalls) UseReg = true;
+ if (!GV || Subtarget->genLongCalls()) UseReg = true;
unsigned CalleeReg = 0;
if (UseReg) {
@@ -2576,8 +2575,8 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
Value *Op = I->getOperand(0);
EVT SrcVT, DestVT;
- SrcVT = TLI.getValueType(Op->getType(), true);
- DestVT = TLI.getValueType(I->getType(), true);
+ SrcVT = TLI.getValueType(DL, Op->getType(), true);
+ DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
@@ -2742,8 +2741,8 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
if (!SrcReg) return false;
EVT SrcEVT, DestEVT;
- SrcEVT = TLI.getValueType(SrcTy, true);
- DestEVT = TLI.getValueType(DestTy, true);
+ SrcEVT = TLI.getValueType(DL, SrcTy, true);
+ DestEVT = TLI.getValueType(DL, DestTy, true);
if (!SrcEVT.isSimple()) return false;
if (!DestEVT.isSimple()) return false;
@@ -2763,7 +2762,7 @@ bool ARMFastISel::SelectShift(const Instruction *I,
return false;
// Only handle i32 now.
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (DestVT != MVT::i32)
return false;
@@ -3026,7 +3025,7 @@ bool ARMFastISel::fastLowerArguments() {
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
return false;
- EVT ArgVT = TLI.getValueType(ArgTy);
+ EVT ArgVT = TLI.getValueType(DL, ArgTy);
if (!ArgVT.isSimple()) return false;
switch (ArgVT.getSimpleVT().SimpleTy) {
case MVT::i8:
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a52e49780e27..6744000afe2b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -800,7 +800,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
// This is bad, if an interrupt is taken after the mov, sp is in an
// inconsistent state.
// Use the first callee-saved register as a scratch register.
- assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
@@ -1470,7 +1470,8 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
// callee-saved vector registers after realigning the stack. The vst1 and vld1
// instructions take alignment hints that can improve performance.
//
-static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
+static void
+checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
if (!SpillAlignedNEONRegs)
return;
@@ -1497,10 +1498,9 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
// callee-saved registers in order, but it can happen that there are holes in
// the range. Registers above the hole will be spilled to the standard DPRCS
// area.
- MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumSpills = 0;
for (; NumSpills < 8; ++NumSpills)
- if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
+ if (!SavedRegs.test(ARM::D8 + NumSpills))
break;
// Don't do this for just one d-register. It's not worth it.
@@ -1511,12 +1511,13 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
// A scratch register is required for the vst1 / vld1 instructions.
- MF.getRegInfo().setPhysRegUsed(ARM::R4);
+ SavedRegs.set(ARM::R4);
}
-void
-ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
// This tells PEI to spill the FP as if it is any other callee-save register
// to take advantage the eliminateFrameIndex machinery. This also ensures it
// is spilled in the order specified by getCalleeSavedRegs() to make it easier
@@ -1543,12 +1544,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: It will be better just to find spare register here.
if (AFI->isThumb2Function() &&
(MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
- MRI.setPhysRegUsed(ARM::R4);
+ SavedRegs.set(ARM::R4);
if (AFI->isThumb1OnlyFunction()) {
// Spill LR if Thumb1 function uses variable length argument lists.
if (AFI->getArgRegsSaveSize() > 0)
- MRI.setPhysRegUsed(ARM::LR);
+ SavedRegs.set(ARM::LR);
// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
// for sure what the stack size will be, but for this, an estimate is good
@@ -1558,23 +1559,23 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: It will be better just to find spare register here.
unsigned StackSize = MFI->estimateStackSize(MF);
if (MFI->hasVarSizedObjects() || StackSize > 508)
- MRI.setPhysRegUsed(ARM::R4);
+ SavedRegs.set(ARM::R4);
}
// See if we can spill vector registers to aligned stack.
- checkNumAlignedDPRCS2Regs(MF);
+ checkNumAlignedDPRCS2Regs(MF, SavedRegs);
// Spill the BasePtr if it's used.
if (RegInfo->hasBasePointer(MF))
- MRI.setPhysRegUsed(RegInfo->getBaseRegister());
+ SavedRegs.set(RegInfo->getBaseRegister());
// Don't spill FP if the frame can be eliminated. This is determined
- // by scanning the callee-save registers to see if any is used.
+ // by scanning the callee-save registers to see if any is modified.
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
- if (MRI.isPhysRegUsed(Reg)) {
+ if (SavedRegs.test(Reg)) {
Spilled = true;
CanEliminateFrame = false;
}
@@ -1668,7 +1669,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
if (!LRSpilled && CS1Spilled) {
- MRI.setPhysRegUsed(ARM::LR);
+ SavedRegs.set(ARM::LR);
NumGPRSpills++;
SmallVectorImpl<unsigned>::iterator LRPos;
LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
@@ -1681,7 +1682,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (hasFP(MF)) {
- MRI.setPhysRegUsed(FramePtr);
+ SavedRegs.set(FramePtr);
auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
FramePtr);
if (FPPos != UnspilledCS1GPRs.end())
@@ -1700,7 +1701,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill high register if the function is thumb
if (!AFI->isThumbFunction() ||
isARMLowRegister(Reg) || Reg == ARM::LR) {
- MRI.setPhysRegUsed(Reg);
+ SavedRegs.set(Reg);
if (!MRI.isReserved(Reg))
ExtraCSSpill = true;
break;
@@ -1708,7 +1709,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
unsigned Reg = UnspilledCS2GPRs.front();
- MRI.setPhysRegUsed(Reg);
+ SavedRegs.set(Reg);
if (!MRI.isReserved(Reg))
ExtraCSSpill = true;
}
@@ -1747,7 +1748,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (Extras.size() && NumExtras == 0) {
for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
- MRI.setPhysRegUsed(Extras[i]);
+ SavedRegs.set(Extras[i]);
}
} else if (!AFI->isThumb1OnlyFunction()) {
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
@@ -1761,7 +1762,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (ForceLRSpill) {
- MRI.setPhysRegUsed(ARM::LR);
+ SavedRegs.set(ARM::LR);
AFI->setLRIsSpilledForFarJump(true);
}
}
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index d763d17a506f..6fdc5eff5e47 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -54,8 +54,8 @@ public:
unsigned &FrameReg, int SPAdj) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
void adjustForSegmentedStacks(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 50afb192b331..b110628a0a86 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -533,7 +533,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
return true;
}
@@ -556,7 +557,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
return true;
@@ -702,7 +704,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
@@ -722,7 +725,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -900,7 +904,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
@@ -915,7 +920,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -964,7 +970,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
@@ -981,7 +988,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
ARM_AM::AddrOpc AddSub = ARM_AM::add;
@@ -1215,7 +1223,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
MachineFrameInfo *MFI = MF->getFrameInfo();
if (MFI->getObjectAlignment(FI) < 4)
MFI->setObjectAlignment(FI, 4);
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
return true;
}
@@ -1237,7 +1246,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
MachineFrameInfo *MFI = MF->getFrameInfo();
if (MFI->getObjectAlignment(FI) < 4)
MFI->setObjectAlignment(FI, 4);
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
return true;
@@ -1285,7 +1295,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
return true;
}
@@ -1314,7 +1325,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
return true;
@@ -1343,7 +1355,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
return true;
@@ -1438,7 +1451,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
@@ -2510,7 +2524,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (UseCP) {
SDValue CPIdx = CurDAG->getTargetConstantPool(
ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
- TLI->getPointerTy());
+ TLI->getPointerTy(CurDAG->getDataLayout()));
SDNode *ResNode;
if (Subtarget->isThumb()) {
@@ -2540,7 +2554,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::FrameIndex: {
// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ SDValue TFI = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
if (Subtarget->isThumb1Only()) {
// Set the alignment of the frame object to 4, to avoid having to generate
// more than one ADD
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4b2105b7442f..e335784f6d87 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -60,11 +60,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
-cl::opt<bool>
-EnableARMLongCalls("arm-long-calls", cl::Hidden,
- cl::desc("Generate calls via indirect call instructions"),
- cl::init(false));
-
static cl::opt<bool>
ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
@@ -548,6 +543,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+ // NEON does not have single instruction CTTZ for vectors.
+ setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
+
// NEON only has FMA instructions as of VFP4.
if (!Subtarget->hasVFP4()) {
setOperationAction(ISD::FMA, MVT::v2f32, Expand);
@@ -1149,8 +1165,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
-EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- if (!VT.isVector()) return getPointerTy();
+EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT VT) const {
+ if (!VT.isVector())
+ return getPointerTy(DL);
return VT.changeVectorElementTypeToInteger();
}
@@ -1429,7 +1447,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ StackPtr, PtrOff);
return DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
@@ -1453,7 +1472,8 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
else {
assert(NextVA.isMemLoc());
if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
+ getPointerTy(DAG.getDataLayout()));
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
dl, DAG, NextVA,
@@ -1526,7 +1546,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getCALLSEQ_START(Chain,
DAG.getIntPtrConstant(NumBytes, dl, true), dl);
- SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+ SDValue StackPtr =
+ DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
RegsToPassVector RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
@@ -1607,7 +1628,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned RegBegin, RegEnd;
CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
unsigned int i, j;
for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
@@ -1628,12 +1650,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
if (Flags.getByValSize() > 4*offset) {
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
- StkPtrOff);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
- SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
MVT::i32);
SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
@@ -1693,8 +1715,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isARMFunc = false;
bool isLocalARMFunc = false;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ auto PtrVt = getPointerTy(DAG.getDataLayout());
- if (EnableARMLongCalls) {
+ if (Subtarget->genLongCalls()) {
assert((Subtarget->isTargetWindows() ||
getTargetMachine().getRelocationModel() == Reloc::Static) &&
"long-calls with non-static relocation model!");
@@ -1709,12 +1732,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
// Get the address of the callee into a register
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(getPointerTy(), dl,
- DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(), false, false,
+ false, 0);
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
@@ -1724,29 +1746,28 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
ARMPCLabelIndex, 0);
// Get the address of the callee into a register
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(getPointerTy(), dl,
- DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(), false, false,
+ false, 0);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
isDirect = true;
- bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
- bool isStub = (isExt && Subtarget->isTargetMachO()) &&
+ bool isDef = GV->isStrongDefinitionForLinker();
+ bool isStub = (!isDef && Subtarget->isTargetMachO()) &&
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
// ARM call to a local ARM function is predicable.
- isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
+ isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
// tBX takes a register source operand.
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
- Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
- DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
- 0, ARMII::MO_NONLAZY));
- Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+ Callee = DAG.getNode(
+ ARMISD::WrapperPIC, dl, PtrVt,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(), false, false, true, 0);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
@@ -1754,20 +1775,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned TargetFlags = GV->hasDLLImportStorageClass()
? ARMII::MO_DLLIMPORT
: ARMII::MO_NO_FLAG;
- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
- TargetFlags);
+ Callee =
+ DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);
if (GV->hasDLLImportStorageClass())
- Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
- Callee), MachinePointerInfo::getGOT(),
- false, false, false, 0);
+ Callee =
+ DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
+ DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
+ MachinePointerInfo::getGOT(), false, false, false, 0);
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
OpFlags = ARMII::MO_PLT;
- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
@@ -1781,22 +1802,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
ARMPCLabelIndex, 4);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(getPointerTy(), dl,
- DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(), false, false,
+ false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
- Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
- getPointerTy(), Callee, PICLabel);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
} else {
unsigned OpFlags = 0;
// On ELF targets for PIC code, direct calls should go through the PLT
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
OpFlags = ARMII::MO_PLT;
- Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags);
}
}
@@ -2433,7 +2452,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = 0;
SDLoc DL(Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
@@ -2462,7 +2481,7 @@ SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const {
SDLoc dl(GA);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2508,7 +2527,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SDLoc dl(GA);
SDValue Offset;
SDValue Chain = DAG.getEntryNode();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Get the Thread Pointer
SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
@@ -2574,7 +2593,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
@@ -2617,7 +2636,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -2648,7 +2667,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
const ARMII::TOF TargetFlags =
(GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
SDLoc DL(Op);
@@ -2672,7 +2691,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
@@ -2716,14 +2735,14 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
}
case Intrinsic::arm_thread_pointer: {
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
unsigned PCAdj = (RelocM != Reloc::PIC_)
@@ -2820,7 +2839,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDLoc dl(Op);
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
@@ -2850,7 +2869,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
// Create load node to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
@@ -2904,8 +2923,9 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
if (REnd != RBegin)
ArgOffset = -4 * (ARM::R4 - RBegin);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
- SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
SmallVector<SDValue, 4> MemOps;
const TargetRegisterClass *RC =
@@ -2918,8 +2938,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getConstant(4, dl, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
}
if (!MemOps.empty())
@@ -3013,6 +3032,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -3035,7 +3055,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue ArgValue2;
if (VA.isMemLoc()) {
int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
@@ -3122,7 +3142,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
CurByValIndex, VA.getLocMemOffset(),
Flags.getByValSize());
- InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+ InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
CCInfo.nextInRegsParam();
} else {
unsigned FIOffset = VA.getLocMemOffset();
@@ -3130,7 +3150,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
FIOffset, true);
// Create load nodes to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0));
@@ -3855,7 +3875,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
SDValue Index = Op.getOperand(2);
SDLoc dl(Op);
- EVT PTy = getPointerTy();
+ EVT PTy = getPointerTy(DAG.getDataLayout());
JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
@@ -4102,8 +4122,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", ARM::SP)
.Default(0);
@@ -4163,7 +4183,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
// Turn f64->i64 into VMOVRRD.
if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
SDValue Cvt;
- if (TLI.isBigEndian() && SrcVT.isVector() &&
+ if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
SrcVT.getVectorNumElements() > 1)
Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32),
@@ -4283,8 +4303,82 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
- EVT VT = N->getValueType(0);
SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ if (VT.isVector()) {
+ assert(ST->hasNEON());
+
+ // Compute the least significant set bit: LSB = X & -X
+ SDValue X = N->getOperand(0);
+ SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
+ SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
+
+ EVT ElemTy = VT.getVectorElementType();
+
+ if (ElemTy == MVT::i8) {
+ // Compute with: cttz(x) = ctpop(lsb - 1)
+ SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(1, dl, ElemTy));
+ SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
+ }
+
+ if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
+ (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
+ // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
+ unsigned NumBits = ElemTy.getSizeInBits();
+ SDValue WidthMinus1 =
+ DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
+ return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
+ }
+
+ // Compute with: cttz(x) = ctpop(lsb - 1)
+
+ // Since we can only compute the number of bits in a byte with vcnt.8, we
+ // have to gather the result with pairwise addition (vpaddl) for i16, i32,
+ // and i64.
+
+ // Compute LSB - 1.
+ SDValue Bits;
+ if (ElemTy == MVT::i64) {
+ // Load constant 0xffff'ffff'ffff'ffff to register.
+ SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(0x1eff, dl, MVT::i32));
+ Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
+ } else {
+ SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+ DAG.getTargetConstant(1, dl, ElemTy));
+ Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
+ }
+
+ // Count #bits with vcnt.8.
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
+ SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
+
+ // Gather the #bits with vpaddl (pairwise add.)
+ EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+ SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+ Cnt8);
+ if (ElemTy == MVT::i16)
+ return Cnt16;
+
+ EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
+ SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+ Cnt16);
+ if (ElemTy == MVT::i32)
+ return Cnt32;
+
+ assert(ElemTy == MVT::i64);
+ SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+ Cnt32);
+ return Cnt64;
+ }
if (!ST->hasV6T2Ops())
return SDValue();
@@ -4730,7 +4824,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
ImmMask <<= 1;
}
- if (DAG.getTargetLoweringInfo().isBigEndian())
+ if (DAG.getDataLayout().isBigEndian())
// swap higher and lower 32 bit word
Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
@@ -5868,7 +5962,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
if (BVN->getValueType(0) != MVT::v4i32 ||
BVN->getOpcode() != ISD::BUILD_VECTOR)
return false;
- unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
unsigned HiElt = 1 - LoElt;
ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
@@ -6013,7 +6107,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
SDNode *BVN = N->getOperand(0).getNode();
assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
- unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
}
@@ -6342,18 +6436,19 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
// Create stack object for sret.
- const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
- const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
+ auto &DL = DAG.getDataLayout();
+ const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
+ const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
- SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+ SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));
ArgListTy Args;
ArgListEntry Entry;
@@ -6373,7 +6468,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
const char *LibcallName = (ArgVT == MVT::f64)
? "__sincos_stret" : "__sincosf_stret";
- SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
@@ -6387,7 +6482,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(), false, false, false, 0);
// Address of cos field.
- SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
+ SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
MachinePointerInfo(), false, false, false, 0);
@@ -6487,7 +6582,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
- case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
@@ -6845,9 +6941,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getDataLayout()->getTypeAllocSize(C->getType());
+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
@@ -6935,9 +7031,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getDataLayout()->getTypeAllocSize(C->getType());
+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
@@ -7313,9 +7409,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+ unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
if (Align == 0)
- Align = getDataLayout()->getTypeAllocSize(C->getType());
+ Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
if (IsThumb1)
@@ -8001,7 +8097,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
// Build operand list.
SmallVector<SDValue, 8> Ops;
Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
- TLI.getPointerTy()));
+ TLI.getPointerTy(DAG.getDataLayout())));
// Input is the vector.
Ops.push_back(Vec);
@@ -8681,7 +8777,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
std::min(4U, LD->getAlignment() / 2));
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
- if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
+ if (DCI.DAG.getDataLayout().isBigEndian())
std::swap (NewLD1, NewLD2);
SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
return Result;
@@ -9312,7 +9408,9 @@ static SDValue PerformSTORECombine(SDNode *N,
SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i < NumElems; ++i)
- ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
+ ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
+ ? (i + 1) * SizeRatio - 1
+ : i * SizeRatio;
// Can't shuffle using an illegal type.
if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
@@ -9339,8 +9437,8 @@ static SDValue PerformSTORECombine(SDNode *N,
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
- SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, DL,
- TLI.getPointerTy());
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
+ TLI.getPointerTy(DAG.getDataLayout()));
SDValue BasePtr = St->getBasePtr();
// Perform one or more big stores into memory.
@@ -9367,7 +9465,7 @@ static SDValue PerformSTORECombine(SDNode *N,
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
- bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
+ bool isBigEndian = DAG.getDataLayout().isBigEndian();
SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(St->getChain(), DL,
@@ -10078,7 +10176,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// For any little-endian targets with neon, we can support unaligned ld/st
// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
// A big-endian target may also explicitly support unaligned accesses
- if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
+ if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
if (Fast)
*Fast = true;
return true;
@@ -10317,10 +10415,10 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
- EVT VT = getValueType(Ty, true);
+ EVT VT = getValueType(DL, Ty, true);
if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
return false;
@@ -10664,7 +10762,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
ARMTargetLowering::ConstraintType
-ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+ARMTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
@@ -10723,10 +10821,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
}
typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
-RCPair
-ARMTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const {
+RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
// GCC ARM Constraint Letters
switch (Constraint[0]) {
@@ -10974,7 +11070,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
- getPointerTy());
+ getPointerTy(DAG.getDataLayout()));
Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
@@ -11083,7 +11179,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::arm_neon_vld4lane: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
+ uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
@@ -11103,12 +11200,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::arm_neon_vst4lane: {
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
unsigned NumElts = 0;
for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeAllocSize(ArgTy) / 8;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
@@ -11122,12 +11220,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
case Intrinsic::arm_ldaex:
case Intrinsic::arm_ldrex: {
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = true;
Info.writeMem = false;
@@ -11135,12 +11234,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
case Intrinsic::arm_stlex:
case Intrinsic::arm_strex: {
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
Info.vol = true;
Info.readMem = false;
Info.writeMem = true;
@@ -11427,9 +11527,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
VectorType *VecTy = Shuffles[0]->getType();
Type *EltTy = VecTy->getVectorElementType();
- const DataLayout *DL = getDataLayout();
- unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
- bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
+ bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
// Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
// support i64/f64 element).
@@ -11439,8 +11539,8 @@ bool ARMTargetLowering::lowerInterleavedLoad(
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
if (EltTy->isPointerTy())
- VecTy = VectorType::get(DL->getIntPtrType(EltTy),
- VecTy->getVectorNumElements());
+ VecTy =
+ VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
Intrinsic::arm_neon_vld3,
@@ -11517,9 +11617,9 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
Type *EltTy = VecTy->getVectorElementType();
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
- const DataLayout *DL = getDataLayout();
- unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
- bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
+ bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
// Skip illegal sub vector types and vector types of i64/f64 element (vstN
// doesn't support i64/f64 element).
@@ -11533,7 +11633,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
// StN intrinsics don't support pointer vectors as arguments. Convert pointer
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
- Type *IntTy = DL->getIntPtrType(EltTy);
+ Type *IntTy = DL.getIntPtrType(EltTy);
// Convert to the corresponding integer vector.
Type *IntVecTy =
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 74396392f8e3..efc9020c193a 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -249,7 +249,8 @@ namespace llvm {
}
/// getSetCCResultType - Return the value type to use for ISD::SETCC.
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
@@ -286,8 +287,8 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -324,8 +325,7 @@ namespace llvm {
bool ExpandInlineAsm(CallInst *CI) const override;
- ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
@@ -334,8 +334,7 @@ namespace llvm {
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
@@ -345,8 +344,8 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "Q")
return InlineAsm::Constraint_Q;
else if (ConstraintCode.size() == 2) {
@@ -533,7 +532,8 @@ namespace llvm {
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
- unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index b8cac135baf6..61c45af26fe1 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -306,8 +306,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
-def IsLE : Predicate<"getTargetLowering()->isLittleEndian()">;
-def IsBE : Predicate<"getTargetLowering()->isBigEndian()">;
+def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
+def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 245c9e869bf6..37352810c99f 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -31,11 +31,13 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -65,12 +67,18 @@ namespace {
static char ID;
ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+ const MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
const ARMSubtarget *STI;
const TargetLowering *TL;
ARMFunctionInfo *AFI;
- RegScavenger *RS;
+ LivePhysRegs LiveRegs;
+ RegisterClassInfo RegClassInfo;
+ MachineBasicBlock::const_iterator LiveRegPos;
+ bool LiveRegsValid;
+ bool RegClassInfoValid;
bool isThumb1, isThumb2;
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -80,64 +88,60 @@ namespace {
}
private:
+ /// A set of load/store MachineInstrs with same base register sorted by
+ /// offset.
struct MemOpQueueEntry {
- int Offset;
- unsigned Reg;
- bool isKill;
- unsigned Position;
- MachineBasicBlock::iterator MBBI;
- bool Merged;
- MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
- MachineBasicBlock::iterator i)
- : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
+ MachineInstr *MI;
+ int Offset; ///< Load/Store offset.
+ unsigned Position; ///< Position as counted from end of basic block.
+ MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
+ : MI(MI), Offset(Offset), Position(Position) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
- typedef MemOpQueue::iterator MemOpQueueIter;
- void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
- const MemOpQueue &MemOps, unsigned DefReg,
- unsigned RangeBegin, unsigned RangeEnd);
+ /// A set of MachineInstrs that fulfill (nearly all) conditions to get
+ /// merged into a LDM/STM.
+ struct MergeCandidate {
+ /// List of instructions ordered by load/store offset.
+ SmallVector<MachineInstr*, 4> Instrs;
+ /// Index in Instrs of the instruction being latest in the schedule.
+ unsigned LatestMIIdx;
+ /// Index in Instrs of the instruction being earliest in the schedule.
+ unsigned EarliestMIIdx;
+ /// Index into the basic block where the merged instruction will be
+ /// inserted. (See MemOpQueueEntry.Position)
+ unsigned InsertPos;
+ /// Whether the instructions can be merged into a ldm/stm instruction.
+ bool CanMergeToLSMulti;
+ /// Whether the instructions can be merged into a ldrd/strd instruction.
+ bool CanMergeToLSDouble;
+ };
+ SpecificBumpPtrAllocator<MergeCandidate> Allocator;
+ SmallVector<const MergeCandidate*,4> Candidates;
+ SmallVector<MachineInstr*,4> MergeBaseCandidates;
+
+ void moveLiveRegsBefore(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator Before);
+ unsigned findFreeReg(const TargetRegisterClass &RegClass);
void UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc dl, unsigned Base, unsigned WordOffset,
+ DebugLoc DL, unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg);
- bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
- ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
- DebugLoc dl,
- ArrayRef<std::pair<unsigned, bool> > Regs,
- ArrayRef<unsigned> ImpDefs);
- void MergeOpsUpdate(MachineBasicBlock &MBB,
- MemOpQueue &MemOps,
- unsigned memOpsBegin,
- unsigned memOpsEnd,
- unsigned insertAfter,
- int Offset,
- unsigned Base,
- bool BaseKill,
- unsigned Opcode,
- ARMCC::CondCodes Pred,
- unsigned PredReg,
- unsigned Scratch,
- DebugLoc dl,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
- void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
- unsigned Opcode, unsigned Size,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch, MemOpQueue &MemOps,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
- void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
+ MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
+ void FormCandidates(const MemOpQueue &MemOps);
+ MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
- bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const TargetInstrInfo *TII,
- bool &Advance,
- MachineBasicBlock::iterator &I);
- bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- bool &Advance,
- MachineBasicBlock::iterator &I);
+ bool MergeBaseUpdateLoadStore(MachineInstr *MI);
+ bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
+ bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
@@ -185,6 +189,14 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
return Offset;
}
+static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
+ return MI.getOperand(1);
+}
+
+static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
+ return MI.getOperand(0);
+}
+
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
@@ -348,6 +360,10 @@ static bool isi32Store(unsigned Opc) {
return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
}
+static bool isLoadSingle(unsigned Opc) {
+ return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+}
+
static unsigned getImmScale(unsigned Opc) {
switch (Opc) {
default: llvm_unreachable("Unhandled opcode!");
@@ -365,12 +381,55 @@ static unsigned getImmScale(unsigned Opc) {
}
}
+static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return 4;
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return 8;
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::VLDMSIA:
+ case ARM::VSTMSIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+ case ARM::VLDMDIA:
+ case ARM::VSTMDIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+ }
+}
+
/// Update future uses of the base register with the offset introduced
/// due to writeback. This function only works on Thumb1.
void
ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc dl, unsigned Base,
+ DebugLoc DL, unsigned Base,
unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg) {
assert(isThumb1 && "Can only update base register uses for Thumb1!");
@@ -398,7 +457,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
// If storing the base register, it needs to be reset first.
- unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
+ unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
MO.setImm(Offset);
@@ -439,7 +498,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
if (InsertSub) {
// An instruction above couldn't be updated, so insert a sub.
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
.addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
return;
}
@@ -457,31 +516,65 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
// See PR21029.
if (MBBI != MBB.end()) --MBBI;
AddDefaultT1CC(
- BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
.addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
}
}
+/// Return the first register of class \p RegClass that is not in \p Regs.
+unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
+ if (!RegClassInfoValid) {
+ RegClassInfo.runOnMachineFunction(*MF);
+ RegClassInfoValid = true;
+ }
+
+ for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
+ if (!LiveRegs.contains(Reg))
+ return Reg;
+ return 0;
+}
+
+/// Compute live registers just before instruction \p Before (in normal schedule
+/// direction). Computes backwards so multiple queries in the same block must
+/// come in reverse order.
+void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator Before) {
+ // Initialize if we never queried in this block.
+ if (!LiveRegsValid) {
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveOuts(&MBB, true);
+ LiveRegPos = MBB.end();
+ LiveRegsValid = true;
+ }
+ // Move backward just before the "Before" position.
+ while (LiveRegPos != Before) {
+ --LiveRegPos;
+ LiveRegs.stepBackward(*LiveRegPos);
+ }
+}
+
+static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
+ unsigned Reg) {
+ for (const std::pair<unsigned, bool> &R : Regs)
+ if (R.first == Reg)
+ return true;
+ return false;
+}
+
/// Create and insert a LDM or STM with Base as base register and registers in
/// Regs as the register operands that would be loaded / stored. It returns
/// true if the transformation is done.
-bool
-ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- int Offset, unsigned Base, bool BaseKill,
- unsigned Opcode, ARMCC::CondCodes Pred,
- unsigned PredReg, unsigned Scratch, DebugLoc dl,
- ArrayRef<std::pair<unsigned, bool> > Regs,
- ArrayRef<unsigned> ImpDefs) {
- // Only a single register to load / store. Don't bother.
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
unsigned NumRegs = Regs.size();
- if (NumRegs <= 1)
- return false;
+ assert(NumRegs > 1);
// For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
// Compute liveness information for that register to make the decision.
bool SafeToClobberCPSR = !isThumb1 ||
- (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
+ (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
MachineBasicBlock::LQR_Dead);
bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
@@ -489,17 +582,14 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback.
// It's also not possible to merge an STR of the base register in Thumb1.
- if (isThumb1)
- for (const std::pair<unsigned, bool> &R : Regs)
- if (Base == R.first) {
- assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
- if (Opcode == ARM::tLDRi) {
- Writeback = false;
- break;
- } else if (Opcode == ARM::tSTRi) {
- return false;
- }
- }
+ if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
+ assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
+ if (Opcode == ARM::tLDRi) {
+ Writeback = false;
+ } else if (Opcode == ARM::tSTRi) {
+ return nullptr;
+ }
+ }
ARM_AM::AMSubMode Mode = ARM_AM::ia;
// VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
@@ -516,18 +606,18 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
} else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
// Check if this is a supported opcode before inserting instructions to
// calculate a new base register.
- if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
+ if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
if (NumRegs <= 2)
- return false;
+ return nullptr;
// On Thumb1, it's not worth materializing a new base register without
// clobbering the CPSR (i.e. not using ADDS/SUBS).
if (!SafeToClobberCPSR)
- return false;
+ return nullptr;
unsigned NewBase;
if (isi32Load(Opcode)) {
@@ -535,10 +625,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// use as the new base.
NewBase = Regs[NumRegs-1].first;
} else {
- // Use the scratch register to use as a new base.
- NewBase = Scratch;
+ // Find a free register that we can use as scratch register.
+ moveLiveRegsBefore(MBB, InsertBefore);
+ // The merged instruction does not exist yet but will use several Regs if
+ // it is a Store.
+ if (!isLoadSingle(Opcode))
+ for (const std::pair<unsigned, bool> &R : Regs)
+ LiveRegs.addReg(R.first);
+
+ NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
if (NewBase == 0)
- return false;
+ return nullptr;
}
int BaseOpc =
@@ -557,7 +654,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (!TL->isLegalAddImmediate(Offset))
// FIXME: Try add with register operand?
- return false; // Probably not worth it then.
+ return nullptr; // Probably not worth it then.
+
+ // We can only append a kill flag to the add/sub input if the value is not
+ // used in the register list of the stm as well.
+ bool KillOldBase = BaseKill &&
+ (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
if (isThumb1) {
// Thumb1: depending on immediate size, use either
@@ -572,43 +674,44 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
!STI->hasV6Ops()) {
// thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
if (Pred != ARMCC::AL)
- return false;
- BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
- .addReg(Base, getKillRegState(BaseKill));
+ return nullptr;
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase));
} else
- BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
- .addReg(Base, getKillRegState(BaseKill))
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase))
.addImm(Pred).addReg(PredReg);
- // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
+ // The following ADDS/SUBS becomes an update.
Base = NewBase;
- BaseKill = false;
+ KillOldBase = true;
}
if (BaseOpc == ARM::tADDrSPi) {
assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
.addImm(Pred).addReg(PredReg);
} else
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ AddDefaultT1CC(
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
.addImm(Pred).addReg(PredReg);
} else {
- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
.addImm(Pred).addReg(PredReg).addReg(0);
}
Base = NewBase;
BaseKill = true; // New base is always killed straight away.
}
- bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
- Opcode == ARM::VLDRD);
+ bool isDef = isLoadSingle(Opcode);
// Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
// base register writeback.
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
- if (!Opcode) return false;
+ if (!Opcode)
+ return nullptr;
// Check if a Thumb1 LDM/STM merge is safe. This is the case if:
// - There is no writeback (LDM of base register),
@@ -619,7 +722,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// It's safe to return here since the code to materialize a new base register
// above is also conditional on SafeToClobberCPSR.
if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
- return false;
+ return nullptr;
MachineInstrBuilder MIB;
@@ -628,7 +731,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
- MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+ MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
// Thumb1: we might need to set base writeback when building the MI.
MIB.addReg(Base, getDefRegState(true))
@@ -637,381 +740,257 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// The base isn't dead after a merged instruction with writeback.
// Insert a sub instruction after the newly formed instruction to reset.
if (!BaseKill)
- UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
+ UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
} else {
// No writeback, simply build the MachineInstr.
- MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+ MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
MIB.addReg(Base, getKillRegState(BaseKill));
}
MIB.addImm(Pred).addReg(PredReg);
for (const std::pair<unsigned, bool> &R : Regs)
- MIB = MIB.addReg(R.first, getDefRegState(isDef)
- | getKillRegState(R.second));
+ MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
- // Add implicit defs for super-registers.
- for (unsigned ImpDef : ImpDefs)
- MIB.addReg(ImpDef, RegState::ImplicitDefine);
-
- return true;
+ return MIB.getInstr();
}
-/// Find all instructions using a given imp-def within a range.
-///
-/// We are trying to combine a range of instructions, one of which (located at
-/// position RangeBegin) implicitly defines a register. The final LDM/STM will
-/// be placed at RangeEnd, and so any uses of this definition between RangeStart
-/// and RangeEnd must be modified to use an undefined value.
-///
-/// The live range continues until we find a second definition or one of the
-/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
-/// we must consider all uses and decide which are relevant in a second pass.
-void ARMLoadStoreOpt::findUsesOfImpDef(
- SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
- unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
- std::map<unsigned, MachineOperand *> Uses;
- unsigned LastLivePos = RangeEnd;
-
- // First we find all uses of this register with Position between RangeBegin
- // and RangeEnd, any or all of these could be uses of a definition at
- // RangeBegin. We also record the latest position a definition at RangeBegin
- // would be considered live.
- for (unsigned i = 0; i < MemOps.size(); ++i) {
- MachineInstr &MI = *MemOps[i].MBBI;
- unsigned MIPosition = MemOps[i].Position;
- if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
- continue;
-
- // If this instruction defines the register, then any later use will be of
- // that definition rather than ours.
- if (MI.definesRegister(DefReg))
- LastLivePos = std::min(LastLivePos, MIPosition);
-
- MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
- if (!UseOp)
- continue;
-
- // If this instruction kills the register then (assuming liveness is
- // correct when we start) we don't need to think about anything after here.
- if (UseOp->isKill())
- LastLivePos = std::min(LastLivePos, MIPosition);
-
- Uses[MIPosition] = UseOp;
- }
-
- // Now we traverse the list of all uses, and append the ones that actually use
- // our definition to the requested list.
- for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
- E = Uses.end();
- I != E; ++I) {
- // List is sorted by position so once we've found one out of range there
- // will be no more to consider.
- if (I->first > LastLivePos)
- break;
- UsesOfImpDefs.push_back(I->second);
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
+ bool IsLoad = isi32Load(Opcode);
+ assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
+ unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
+
+ assert(Regs.size() == 2);
+ MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
+ TII->get(LoadStoreOpcode));
+ if (IsLoad) {
+ MIB.addReg(Regs[0].first, RegState::Define)
+ .addReg(Regs[1].first, RegState::Define);
+ } else {
+ MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(Regs[1].first, getKillRegState(Regs[1].second));
}
+ MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ return MIB.getInstr();
}
/// Call MergeOps and update MemOps and merges accordingly on success.
-void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
- MemOpQueue &memOps,
- unsigned memOpsBegin, unsigned memOpsEnd,
- unsigned insertAfter, int Offset,
- unsigned Base, bool BaseKill,
- unsigned Opcode,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch,
- DebugLoc dl,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
- // First calculate which of the registers should be killed by the merged
- // instruction.
- const unsigned insertPos = memOps[insertAfter].Position;
- SmallSet<unsigned, 4> KilledRegs;
- DenseMap<unsigned, unsigned> Killer;
- for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
- if (i == memOpsBegin) {
- i = memOpsEnd;
- if (i == e)
- break;
- }
- if (memOps[i].Position < insertPos && memOps[i].isKill) {
- unsigned Reg = memOps[i].Reg;
+MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
+ const MachineInstr *First = Cand.Instrs.front();
+ unsigned Opcode = First->getOpcode();
+ bool IsLoad = isLoadSingle(Opcode);
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ SmallVector<unsigned, 4> ImpDefs;
+ DenseSet<unsigned> KilledRegs;
+ // Determine list of registers and list of implicit super-register defs.
+ for (const MachineInstr *MI : Cand.Instrs) {
+ const MachineOperand &MO = getLoadStoreRegOp(*MI);
+ unsigned Reg = MO.getReg();
+ bool IsKill = MO.isKill();
+ if (IsKill)
KilledRegs.insert(Reg);
- Killer[Reg] = i;
+ Regs.push_back(std::make_pair(Reg, IsKill));
+
+ if (IsLoad) {
+ // Collect any implicit defs of super-registers, after merging we can't
+ // be sure anymore that we properly preserved these live ranges and must
+ // removed these implicit operands.
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ continue;
+ assert(MO.isImplicit());
+ unsigned DefReg = MO.getReg();
+
+ if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
+ continue;
+ // We can ignore cases where the super-reg is read and written.
+ if (MI->readsRegister(DefReg))
+ continue;
+ ImpDefs.push_back(DefReg);
+ }
}
}
- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0);
- if (TransferOp.isUse() && TransferOp.getReg() == Base)
- BaseKill = false;
+ // Attempt the merge.
+ typedef MachineBasicBlock::iterator iterator;
+ MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
+ iterator InsertBefore = std::next(iterator(LatestMI));
+ MachineBasicBlock &MBB = *LatestMI->getParent();
+ unsigned Offset = getMemoryOpOffset(First);
+ unsigned Base = getLoadStoreBaseOp(*First).getReg();
+ bool BaseKill = LatestMI->killsRegister(Base);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
+ DebugLoc DL = First->getDebugLoc();
+ MachineInstr *Merged = nullptr;
+ if (Cand.CanMergeToLSDouble)
+ Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
+ Opcode, Pred, PredReg, DL, Regs);
+ if (!Merged && Cand.CanMergeToLSMulti)
+ Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
+ Opcode, Pred, PredReg, DL, Regs);
+ if (!Merged)
+ return nullptr;
+
+ // Determine earliest instruction that will get removed. We then keep an
+ // iterator just above it so the following erases don't invalidated it.
+ iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
+ bool EarliestAtBegin = false;
+ if (EarliestI == MBB.begin()) {
+ EarliestAtBegin = true;
+ } else {
+ EarliestI = std::prev(EarliestI);
}
- SmallVector<std::pair<unsigned, bool>, 8> Regs;
- SmallVector<unsigned, 8> ImpDefs;
- SmallVector<MachineOperand *, 8> UsesOfImpDefs;
- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- unsigned Reg = memOps[i].Reg;
- // If we are inserting the merged operation after an operation that
- // uses the same register, make sure to transfer any kill flag.
- bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
- Regs.push_back(std::make_pair(Reg, isKill));
-
- // Collect any implicit defs of super-registers. They must be preserved.
- for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
- if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
- continue;
- unsigned DefReg = MO.getReg();
- if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
- ImpDefs.push_back(DefReg);
-
- // There may be other uses of the definition between this instruction and
- // the eventual LDM/STM position. These should be marked undef if the
- // merge takes place.
- findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
- insertPos);
+ // Remove instructions which have been merged.
+ for (MachineInstr *MI : Cand.Instrs)
+ MBB.erase(MI);
+
+ // Determine range between the earliest removed instruction and the new one.
+ if (EarliestAtBegin)
+ EarliestI = MBB.begin();
+ else
+ EarliestI = std::next(EarliestI);
+ auto FixupRange = make_range(EarliestI, iterator(Merged));
+
+ if (isLoadSingle(Opcode)) {
+ // If the previous loads defined a super-reg, then we have to mark earlier
+ // operands undef; Replicate the super-reg def on the merged instruction.
+ for (MachineInstr &MI : FixupRange) {
+ for (unsigned &ImpDefReg : ImpDefs) {
+ for (MachineOperand &MO : MI.implicit_operands()) {
+ if (!MO.isReg() || MO.getReg() != ImpDefReg)
+ continue;
+ if (MO.readsReg())
+ MO.setIsUndef();
+ else if (MO.isDef())
+ ImpDefReg = 0;
+ }
+ }
}
- }
- // Try to do the merge.
- MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
- ++Loc;
- if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
- Pred, PredReg, Scratch, dl, Regs, ImpDefs))
- return;
-
- // Merge succeeded, update records.
- Merges.push_back(std::prev(Loc));
-
- // In gathering loads together, we may have moved the imp-def of a register
- // past one of its uses. This is OK, since we know better than the rest of
- // LLVM what's OK with ARM loads and stores; but we still have to adjust the
- // affected uses.
- for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
- E = UsesOfImpDefs.end();
- I != E; ++I)
- (*I)->setIsUndef();
-
- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- // Remove kill flags from any memops that come before insertPos.
- if (Regs[i-memOpsBegin].second) {
- unsigned Reg = Regs[i-memOpsBegin].first;
- if (KilledRegs.count(Reg)) {
- unsigned j = Killer[Reg];
- int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
- assert(Idx >= 0 && "Cannot find killing operand");
- memOps[j].MBBI->getOperand(Idx).setIsKill(false);
- memOps[j].isKill = false;
+ MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
+ for (unsigned ImpDef : ImpDefs)
+ MIB.addReg(ImpDef, RegState::ImplicitDefine);
+ } else {
+ // Remove kill flags: We are possibly storing the values later now.
+ assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
+ for (MachineInstr &MI : FixupRange) {
+ for (MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.isKill())
+ continue;
+ if (KilledRegs.count(MO.getReg()))
+ MO.setIsKill(false);
}
- memOps[i].isKill = true;
}
- MBB.erase(memOps[i].MBBI);
- // Update this memop to refer to the merged instruction.
- // We may need to move kill flags again.
- memOps[i].Merged = true;
- memOps[i].MBBI = Merges.back();
- memOps[i].Position = insertPos;
+ assert(ImpDefs.empty());
}
- // Update memOps offsets, since they may have been modified by MergeOps.
- for (auto &MemOp : memOps) {
- MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
- }
+ return Merged;
}
-/// Merge a number of load / store instructions into one or more load / store
-/// multiple instructions.
-void
-ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
- unsigned Base, unsigned Opcode, unsigned Size,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch, MemOpQueue &MemOps,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
- bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
- int Offset = MemOps[SIndex].Offset;
- int SOffset = Offset;
- unsigned insertAfter = SIndex;
- MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
- DebugLoc dl = Loc->getDebugLoc();
- const MachineOperand &PMO = Loc->getOperand(0);
- unsigned PReg = PMO.getReg();
- unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
- unsigned Count = 1;
- unsigned Limit = ~0U;
- bool BaseKill = false;
- // vldm / vstm limit are 32 for S variants, 16 for D variants.
+static bool isValidLSDoubleOffset(int Offset) {
+ unsigned Value = abs(Offset);
+ // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
+ // multiplied by 4.
+ return (Value % 4) == 0 && Value < 1024;
+}
- switch (Opcode) {
- default: break;
- case ARM::VSTRS:
- Limit = 32;
- break;
- case ARM::VSTRD:
- Limit = 16;
- break;
- case ARM::VLDRD:
- Limit = 16;
- break;
- case ARM::VLDRS:
- Limit = 32;
- break;
- }
+/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
+void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
+ const MachineInstr *FirstMI = MemOps[0].MI;
+ unsigned Opcode = FirstMI->getOpcode();
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ unsigned Size = getLSMultipleTransferSize(FirstMI);
+
+ unsigned SIndex = 0;
+ unsigned EIndex = MemOps.size();
+ do {
+ // Look at the first instruction.
+ const MachineInstr *MI = MemOps[SIndex].MI;
+ int Offset = MemOps[SIndex].Offset;
+ const MachineOperand &PMO = getLoadStoreRegOp(*MI);
+ unsigned PReg = PMO.getReg();
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
+ unsigned Latest = SIndex;
+ unsigned Earliest = SIndex;
+ unsigned Count = 1;
+ bool CanMergeToLSDouble =
+ STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
+ // ARM errata 602117: LDRD with base in list may result in incorrect base
+ // register when interrupted or faulted.
+ if (STI->isCortexM3() && isi32Load(Opcode) &&
+ PReg == getLoadStoreBaseOp(*MI).getReg())
+ CanMergeToLSDouble = false;
+
+ bool CanMergeToLSMulti = true;
+ // On swift vldm/vstm starting with an odd register number as that needs
+ // more uops than single vldrs.
+ if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
+ CanMergeToLSMulti = false;
+
+ // Merge following instructions where possible.
+ for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
+ int NewOffset = MemOps[I].Offset;
+ if (NewOffset != Offset + (int)Size)
+ break;
+ const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
+ unsigned Reg = MO.getReg();
+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
+
+ // See if the current load/store may be part of a multi load/store.
+ bool PartOfLSMulti = CanMergeToLSMulti;
+ if (PartOfLSMulti) {
+ // Cannot load from SP
+ if (Reg == ARM::SP)
+ PartOfLSMulti = false;
+ // Register numbers must be in ascending order.
+ else if (RegNum <= PRegNum)
+ PartOfLSMulti = false;
+ // For VFP / NEON load/store multiples, the registers must be
+ // consecutive and within the limit on the number of registers per
+ // instruction.
+ else if (!isNotVFP && RegNum != PRegNum+1)
+ PartOfLSMulti = false;
+ }
+ // See if the current load/store may be part of a double load/store.
+ bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
- for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
- int NewOffset = MemOps[i].Offset;
- const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
- unsigned Reg = MO.getReg();
- unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
- // Register numbers must be in ascending order. For VFP / NEON load and
- // store multiples, the registers must also be consecutive and within the
- // limit on the number of registers per instruction.
- if (Reg != ARM::SP &&
- NewOffset == Offset + (int)Size &&
- ((isNotVFP && RegNum > PRegNum) ||
- ((Count < Limit) && RegNum == PRegNum+1)) &&
- // On Swift we don't want vldm/vstm to start with a odd register num
- // because Q register unaligned vldm/vstm need more uops.
- (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
+ if (!PartOfLSMulti && !PartOfLSDouble)
+ break;
+ CanMergeToLSMulti &= PartOfLSMulti;
+ CanMergeToLSDouble &= PartOfLSDouble;
+ // Track MemOp with latest and earliest position (Positions are
+ // counted in reverse).
+ unsigned Position = MemOps[I].Position;
+ if (Position < MemOps[Latest].Position)
+ Latest = I;
+ else if (Position > MemOps[Earliest].Position)
+ Earliest = I;
+ // Prepare for next MemOp.
Offset += Size;
PRegNum = RegNum;
- ++Count;
- } else {
- // Can't merge this in. Try merge the earlier ones first.
- // We need to compute BaseKill here because the MemOps may have been
- // reordered.
- BaseKill = Loc->killsRegister(Base);
-
- MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
- BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
- MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
- MemOps, Merges);
- return;
}
- if (MemOps[i].Position > MemOps[insertAfter].Position) {
- insertAfter = i;
- Loc = MemOps[i].MBBI;
- }
- }
-
- BaseKill = Loc->killsRegister(Base);
- MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
- Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
-}
-
-static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- unsigned MyPredReg = 0;
- if (!MI)
- return false;
-
- bool CheckCPSRDef = false;
- switch (MI->getOpcode()) {
- default: return false;
- case ARM::tSUBi8:
- case ARM::t2SUBri:
- case ARM::SUBri:
- CheckCPSRDef = true;
- break;
- case ARM::tSUBspi:
- break;
- }
-
- // Make sure the offset fits in 8 bits.
- if (Bytes == 0 || (Limit && Bytes >= Limit))
- return false;
-
- unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
- MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
- if (!(MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm() * Scale) == Bytes &&
- getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg))
- return false;
-
- return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
-static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- unsigned MyPredReg = 0;
- if (!MI)
- return false;
-
- bool CheckCPSRDef = false;
- switch (MI->getOpcode()) {
- default: return false;
- case ARM::tADDi8:
- case ARM::t2ADDri:
- case ARM::ADDri:
- CheckCPSRDef = true;
- break;
- case ARM::tADDspi:
- break;
- }
-
- if (Bytes == 0 || (Limit && Bytes >= Limit))
- // Make sure the offset fits in 8 bits.
- return false;
-
- unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
- MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
- if (!(MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm() * Scale) == Bytes &&
- getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg))
- return false;
-
- return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
-static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
- switch (MI->getOpcode()) {
- default: return 0;
- case ARM::LDRi12:
- case ARM::STRi12:
- case ARM::tLDRi:
- case ARM::tSTRi:
- case ARM::tLDRspi:
- case ARM::tSTRspi:
- case ARM::t2LDRi8:
- case ARM::t2LDRi12:
- case ARM::t2STRi8:
- case ARM::t2STRi12:
- case ARM::VLDRS:
- case ARM::VSTRS:
- return 4;
- case ARM::VLDRD:
- case ARM::VSTRD:
- return 8;
- case ARM::LDMIA:
- case ARM::LDMDA:
- case ARM::LDMDB:
- case ARM::LDMIB:
- case ARM::STMIA:
- case ARM::STMDA:
- case ARM::STMDB:
- case ARM::STMIB:
- case ARM::tLDMIA:
- case ARM::tLDMIA_UPD:
- case ARM::tSTMIA_UPD:
- case ARM::t2LDMIA:
- case ARM::t2LDMDB:
- case ARM::t2STMIA:
- case ARM::t2STMDB:
- case ARM::VLDMSIA:
- case ARM::VSTMSIA:
- return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
- case ARM::VLDMDIA:
- case ARM::VSTMDIA:
- return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
- }
+ // Form a candidate from the Ops collected so far.
+ MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
+ for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
+ Candidate->Instrs.push_back(MemOps[C].MI);
+ Candidate->LatestMIIdx = Latest - SIndex;
+ Candidate->EarliestMIIdx = Earliest - SIndex;
+ Candidate->InsertPos = MemOps[Latest].Position;
+ if (Count == 1)
+ CanMergeToLSMulti = CanMergeToLSDouble = false;
+ Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
+ Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
+ Candidates.push_back(Candidate);
+ // Continue after the chain.
+ SIndex += Count;
+ } while (SIndex < EIndex);
}
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
@@ -1081,6 +1060,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
}
}
+/// Check if the given instruction increments or decrements a register and
+/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
+/// generated by the instruction are possibly read as well.
+static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ bool CheckCPSRDef;
+ int Scale;
+ switch (MI.getOpcode()) {
+ case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
+ case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
+ case ARM::t2SUBri:
+ case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
+ case ARM::t2ADDri:
+ case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
+ case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
+ case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
+ default: return 0;
+ }
+
+ unsigned MIPredReg;
+ if (MI.getOperand(0).getReg() != Reg ||
+ MI.getOperand(1).getReg() != Reg ||
+ getInstrPredicate(&MI, MIPredReg) != Pred ||
+ MIPredReg != PredReg)
+ return 0;
+
+ if (CheckCPSRDef && definesCPSR(&MI))
+ return 0;
+ return MI.getOperand(2).getImm() * Scale;
+}
+
+/// Searches for an increment or decrement of \p Reg before \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (MBBI == BeginMBBI)
+ return EndMBBI;
+
+ // Skip debug values.
+ MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
+ while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
+ --PrevMBBI;
+
+ Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : PrevMBBI;
+}
+
+/// Searches for a increment or decrement of \p Reg after \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
+ // Skip debug values.
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (NextMBBI == EndMBBI)
+ return EndMBBI;
+
+ Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : NextMBBI;
+}
+
/// Fold proceeding/trailing inc/dec of base register into the
/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
@@ -1093,21 +1141,17 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
/// ldmia rn, <ra, rb, rc>
/// =>
/// ldmdb rn!, <ra, rb, rc>
-bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- bool &Advance,
- MachineBasicBlock::iterator &I) {
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Thumb1 is already using updating loads/stores.
if (isThumb1) return false;
- MachineInstr *MI = MBBI;
- unsigned Base = MI->getOperand(0).getReg();
- bool BaseKill = MI->getOperand(0).isKill();
- unsigned Bytes = getLSMultipleTransferSize(MI);
+ const MachineOperand &BaseOP = MI->getOperand(0);
+ unsigned Base = BaseOP.getReg();
+ bool BaseKill = BaseOP.isKill();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
unsigned Opcode = MI->getOpcode();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
// Can't use an updating ld/st if the base register is also a dest
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
@@ -1115,55 +1159,27 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
if (MI->getOperand(i).getReg() == Base)
return false;
- bool DoMerge = false;
+ int Bytes = getLSMultipleTransferSize(MI);
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
-
- // Try merging with the previous instruction.
- MachineBasicBlock::iterator BeginMBBI = MBB.begin();
- if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
- --PrevMBBI;
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::db;
- DoMerge = true;
- } else if (Mode == ARM_AM::ib &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::da;
- DoMerge = true;
- }
- if (DoMerge)
- MBB.erase(PrevMBBI);
- }
-
- // Try merging with the next instruction.
- MachineBasicBlock::iterator EndMBBI = MBB.end();
- if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
- ++NextMBBI;
- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- if (NextMBBI == I) {
- Advance = true;
- ++I;
- }
- MBB.erase(NextMBBI);
- }
+ if (Mode == ARM_AM::ia && Offset == -Bytes) {
+ Mode = ARM_AM::db;
+ } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
+ Mode = ARM_AM::da;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
+ ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
+ return false;
}
-
- if (!DoMerge)
- return false;
+ MBB.erase(MergeInstr);
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
.addReg(Base, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg);
@@ -1231,21 +1247,15 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
/// Fold proceeding/trailing inc/dec of base register into the
/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
-bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const TargetInstrInfo *TII,
- bool &Advance,
- MachineBasicBlock::iterator &I) {
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// Thumb1 doesn't have updating LDR/STR.
// FIXME: Use LDM/STM with single register instead.
if (isThumb1) return false;
- MachineInstr *MI = MBBI;
- unsigned Base = MI->getOperand(1).getReg();
- bool BaseKill = MI->getOperand(1).isKill();
- unsigned Bytes = getLSMultipleTransferSize(MI);
+ unsigned Base = getLoadStoreBaseOp(*MI).getReg();
+ bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
unsigned Opcode = MI->getOpcode();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
@@ -1255,7 +1265,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
return false;
- bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (MI->getOperand(0).getReg() == Base)
@@ -1263,64 +1272,38 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
- bool DoMerge = false;
- ARM_AM::AddrOpc AddSub = ARM_AM::add;
- unsigned NewOpc = 0;
- // AM2 - 12 bits, thumb2 - 8 bits.
- unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
-
- // Try merging with the previous instruction.
- MachineBasicBlock::iterator BeginMBBI = MBB.begin();
- if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
- --PrevMBBI;
- if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
- DoMerge = true;
- AddSub = ARM_AM::sub;
- } else if (!isAM5 &&
- isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
- MBB.erase(PrevMBBI);
- }
- }
-
- // Try merging with the next instruction.
- MachineBasicBlock::iterator EndMBBI = MBB.end();
- if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
- ++NextMBBI;
- if (!isAM5 &&
- isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
- DoMerge = true;
- AddSub = ARM_AM::sub;
- } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
- if (NextMBBI == I) {
- Advance = true;
- ++I;
- }
- MBB.erase(NextMBBI);
- }
+ int Bytes = getLSMultipleTransferSize(MI);
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+ unsigned NewOpc;
+ if (!isAM5 && Offset == Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (Offset == -Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (!isAM5 && Offset == -Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else
+ return false;
}
+ MBB.erase(MergeInstr);
- if (!DoMerge)
- return false;
+ ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
+ bool isLd = isLoadSingle(Opcode);
if (isAM5) {
// VLDM[SD]_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
// updating load/store-multiple instructions can be used with only one
// register.)
MachineOperand &MO = MI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
.addReg(Base, getKillRegState(isLd ? BaseKill : false))
.addImm(Pred).addReg(PredReg)
@@ -1330,20 +1313,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (isAM2) {
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
}
} else {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2LDR_PRE, t2LDR_POST
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
}
@@ -1353,15 +1334,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
// the vestigal zero-reg offset register. When that's fixed, this clause
// can be removed entirely.
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
} else {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2STR_PRE, t2STR_POST
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
}
@@ -1371,6 +1351,66 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
return true;
}
+bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
+ "Must have t2STRDi8 or t2LDRDi8");
+ if (MI.getOperand(3).getImm() != 0)
+ return false;
+
+ // Behaviour for writeback is undefined if base register is the same as one
+ // of the others.
+ const MachineOperand &BaseOp = MI.getOperand(2);
+ unsigned Base = BaseOp.getReg();
+ const MachineOperand &Reg0Op = MI.getOperand(0);
+ const MachineOperand &Reg1Op = MI.getOperand(1);
+ if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
+ return false;
+
+ unsigned PredReg;
+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ MachineBasicBlock::iterator MBBI(MI);
+ MachineBasicBlock &MBB = *MI.getParent();
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
+ PredReg, Offset);
+ unsigned NewOpc;
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
+ } else
+ return false;
+ }
+ MBB.erase(MergeInstr);
+
+ DebugLoc DL = MI.getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+ if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
+ MIB.addOperand(Reg0Op).addOperand(Reg1Op)
+ .addReg(BaseOp.getReg(), RegState::Define);
+ } else {
+ assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
+ MIB.addReg(BaseOp.getReg(), RegState::Define)
+ .addOperand(Reg0Op).addOperand(Reg1Op);
+ }
+ MIB.addReg(BaseOp.getReg(), RegState::Kill)
+ .addImm(Offset).addImm(Pred).addReg(PredReg);
+ assert(TII->get(Opcode).getNumOperands() == 6 &&
+ TII->get(NewOpc).getNumOperands() == 7 &&
+ "Unexpected number of operands in Opcode specification.");
+
+ // Transfer implicit operands.
+ for (const MachineOperand &MO : MI.implicit_operands())
+ MIB.addOperand(MO);
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+ MBB.erase(MBBI);
+ return true;
+}
+
/// Returns true if instruction is a memory operation that this pass is capable
/// of operating on.
static bool isMemoryOp(const MachineInstr *MI) {
@@ -1426,26 +1466,10 @@ static bool isMemoryOp(const MachineInstr *MI) {
return false;
}
-/// Advance register scavenger to just before the earliest memory op that is
-/// being merged.
-void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
- MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
- unsigned Position = MemOps[0].Position;
- for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
- if (MemOps[i].Position < Position) {
- Position = MemOps[i].Position;
- Loc = MemOps[i].MBBI;
- }
- }
-
- if (Loc != MBB.begin())
- RS->forward(std::prev(Loc));
-}
-
static void InsertLDR_STR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
int Offset, bool isDef,
- DebugLoc dl, unsigned NewOpc,
+ DebugLoc DL, unsigned NewOpc,
unsigned Reg, bool RegDeadKill, bool RegUndef,
unsigned BaseReg, bool BaseKill, bool BaseUndef,
bool OffKill, bool OffUndef,
@@ -1491,7 +1515,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if (!Errata602117 && !NonConsecutiveRegs)
return false;
- MachineBasicBlock::iterator NewBBI = MBBI;
bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
bool EvenDeadKill = isLd ?
@@ -1531,7 +1554,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
++NumSTRD2STM;
}
- NewBBI = std::prev(MBBI);
} else {
// Split into two instructions.
unsigned NewOpc = (isLd)
@@ -1553,7 +1575,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
OddReg, OddDeadKill, false,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
- NewBBI = std::prev(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1573,7 +1594,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
- NewBBI = std::prev(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
OddReg, OddDeadKill, OddUndef,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1585,191 +1605,160 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
++NumSTRD2STR;
}
- MBB.erase(MI);
- MBBI = NewBBI;
+ MBBI = MBB.erase(MBBI);
return true;
}
/// An optimization pass to turn multiple LDR / STR ops of the same base and
/// incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
- unsigned NumMerges = 0;
- unsigned NumMemOps = 0;
MemOpQueue MemOps;
unsigned CurrBase = 0;
unsigned CurrOpc = ~0u;
- unsigned CurrSize = 0;
ARMCC::CondCodes CurrPred = ARMCC::AL;
- unsigned CurrPredReg = 0;
unsigned Position = 0;
- SmallVector<MachineBasicBlock::iterator,4> Merges;
-
- RS->enterBasicBlock(&MBB);
- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- while (MBBI != E) {
+ assert(Candidates.size() == 0);
+ assert(MergeBaseCandidates.size() == 0);
+ LiveRegsValid = false;
+
+ for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
+ I = MBBI) {
+ // The instruction in front of the iterator is the one we look at.
+ MBBI = std::prev(I);
if (FixInvalidRegPairOp(MBB, MBBI))
continue;
+ ++Position;
- bool Advance = false;
- bool TryMerge = false;
-
- bool isMemOp = isMemoryOp(MBBI);
- if (isMemOp) {
+ if (isMemoryOp(MBBI)) {
unsigned Opcode = MBBI->getOpcode();
- unsigned Size = getLSMultipleTransferSize(MBBI);
const MachineOperand &MO = MBBI->getOperand(0);
unsigned Reg = MO.getReg();
- bool isKill = MO.isDef() ? false : MO.isKill();
- unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
int Offset = getMemoryOpOffset(MBBI);
- // Watch out for:
- // r4 := ldr [r5]
- // r5 := ldr [r5, #4]
- // r6 := ldr [r5, #8]
- //
- // The second ldr has effectively broken the chain even though it
- // looks like the later ldr(s) use the same base register. Try to
- // merge the ldr's so far, including this one. But don't try to
- // combine the following ldr(s).
- bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
-
- // Watch out for:
- // r4 := ldr [r0, #8]
- // r4 := ldr [r0, #4]
- //
- // The optimization may reorder the second ldr in front of the first
- // ldr, which violates write after write(WAW) dependence. The same as
- // str. Try to merge inst(s) already in MemOps.
- bool Overlap = false;
- for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
- if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
- Overlap = true;
- break;
- }
- }
-
- if (CurrBase == 0 && !Clobber) {
+ if (CurrBase == 0) {
// Start of a new chain.
CurrBase = Base;
CurrOpc = Opcode;
- CurrSize = Size;
CurrPred = Pred;
- CurrPredReg = PredReg;
- MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
- ++NumMemOps;
- Advance = true;
- } else if (!Overlap) {
- if (Clobber) {
- TryMerge = true;
- Advance = true;
+ MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
+ }
+ // Note: No need to match PredReg in the next if.
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // Watch out for:
+ // r4 := ldr [r0, #8]
+ // r4 := ldr [r0, #4]
+ // or
+ // r0 := ldr [r0]
+ // If a load overrides the base register or a register loaded by
+ // another load in our chain, we cannot take this instruction.
+ bool Overlap = false;
+ if (isLoadSingle(Opcode)) {
+ Overlap = (Base == Reg);
+ if (!Overlap) {
+ for (const MemOpQueueEntry &E : MemOps) {
+ if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
+ Overlap = true;
+ break;
+ }
+ }
+ }
}
- if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
- // No need to match PredReg.
- // Continue adding to the queue.
+ if (!Overlap) {
+ // Check offset and sort memory operation into the current chain.
if (Offset > MemOps.back().Offset) {
- MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
- Position, MBBI));
- ++NumMemOps;
- Advance = true;
+ MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
} else {
- for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
- I != E; ++I) {
- if (Offset < I->Offset) {
- MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
- Position, MBBI));
- ++NumMemOps;
- Advance = true;
+ MemOpQueue::iterator MI, ME;
+ for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
+ if (Offset < MI->Offset) {
+ // Found a place to insert.
break;
- } else if (Offset == I->Offset) {
- // Collision! This can't be merged!
+ }
+ if (Offset == MI->Offset) {
+ // Collision, abort.
+ MI = ME;
break;
}
}
+ if (MI != MemOps.end()) {
+ MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
+ }
}
}
}
- }
- if (MBBI->isDebugValue()) {
- ++MBBI;
- if (MBBI == E)
- // Reach the end of the block, try merging the memory instructions.
- TryMerge = true;
- } else if (Advance) {
- ++Position;
- ++MBBI;
- if (MBBI == E)
- // Reach the end of the block, try merging the memory instructions.
- TryMerge = true;
- } else {
- TryMerge = true;
+ // Don't advance the iterator; The op will start a new chain next.
+ MBBI = I;
+ --Position;
+ // Fallthrough to look into existing chain.
+ } else if (MBBI->isDebugValue()) {
+ continue;
+ } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
+ MBBI->getOpcode() == ARM::t2STRDi8) {
+ // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
+ // remember them because we may still be able to merge add/sub into them.
+ MergeBaseCandidates.push_back(MBBI);
}
- if (TryMerge) {
- if (NumMemOps > 1) {
- // Try to find a free register to use as a new base in case it's needed.
- // First advance to the instruction just before the start of the chain.
- AdvanceRS(MBB, MemOps);
-
- // Find a scratch register.
- unsigned Scratch =
- RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
-
- // Process the load / store instructions.
- RS->forward(std::prev(MBBI));
-
- // Merge ops.
- Merges.clear();
- MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
- CurrPred, CurrPredReg, Scratch, MemOps, Merges);
-
- // Try folding preceding/trailing base inc/dec into the generated
- // LDM/STM ops.
- for (unsigned i = 0, e = Merges.size(); i < e; ++i)
- if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
- ++NumMerges;
- NumMerges += Merges.size();
-
- // Try folding preceding/trailing base inc/dec into those load/store
- // that were not merged to form LDM/STM ops.
- for (unsigned i = 0; i != NumMemOps; ++i)
- if (!MemOps[i].Merged)
- if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
- ++NumMerges;
-
- // RS may be pointing to an instruction that's deleted.
- RS->skipTo(std::prev(MBBI));
- } else if (NumMemOps == 1) {
- // Try folding preceding/trailing base inc/dec into the single
- // load/store.
- if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
- ++NumMerges;
- RS->forward(std::prev(MBBI));
- }
- }
+ // If we are here then the chain is broken; Extract candidates for a merge.
+ if (MemOps.size() > 0) {
+ FormCandidates(MemOps);
+ // Reset for the next chain.
CurrBase = 0;
CurrOpc = ~0u;
- CurrSize = 0;
CurrPred = ARMCC::AL;
- CurrPredReg = 0;
- if (NumMemOps) {
- MemOps.clear();
- NumMemOps = 0;
- }
+ MemOps.clear();
+ }
+ }
+ if (MemOps.size() > 0)
+ FormCandidates(MemOps);
- // If iterator hasn't been advanced and this is not a memory op, skip it.
- // It can't start a new chain anyway.
- if (!Advance && !isMemOp && MBBI != E) {
- ++Position;
- ++MBBI;
+ // Sort candidates so they get processed from end to begin of the basic
+ // block later; This is necessary for liveness calculation.
+ auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
+ return M0->InsertPos < M1->InsertPos;
+ };
+ std::sort(Candidates.begin(), Candidates.end(), LessThan);
+
+ // Go through list of candidates and merge.
+ bool Changed = false;
+ for (const MergeCandidate *Candidate : Candidates) {
+ if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
+ MachineInstr *Merged = MergeOpsUpdate(*Candidate);
+ // Merge preceding/trailing base inc/dec into the merged op.
+ if (Merged) {
+ Changed = true;
+ unsigned Opcode = Merged->getOpcode();
+ if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
+ MergeBaseUpdateLSDouble(*Merged);
+ else
+ MergeBaseUpdateLSMultiple(Merged);
+ } else {
+ for (MachineInstr *MI : Candidate->Instrs) {
+ if (MergeBaseUpdateLoadStore(MI))
+ Changed = true;
+ }
}
+ } else {
+ assert(Candidate->Instrs.size() == 1);
+ if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
+ Changed = true;
}
}
- return NumMerges > 0;
+ Candidates.clear();
+ // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
+ for (MachineInstr *MI : MergeBaseCandidates)
+ MergeBaseUpdateLSDouble(*MI);
+ MergeBaseCandidates.clear();
+
+ return Changed;
}
/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
@@ -1814,12 +1803,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
}
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ MF = &Fn;
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TL = STI->getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
- RS = new RegScavenger();
+ MRI = &Fn.getRegInfo();
+ RegClassInfoValid = false;
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
@@ -1832,7 +1823,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Modified |= MergeReturnIntoLDM(MBB);
}
- delete RS;
+ Allocator.DestroyAll();
return Modified;
}
@@ -2219,7 +2210,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
continue;
int Opc = MI->getOpcode();
- bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+ bool isLd = isLoadSingle(Opc);
unsigned Base = MI->getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index a59cf9851108..6cafbbb9f8eb 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -18,12 +18,6 @@ using namespace llvm;
#define DEBUG_TYPE "arm-selectiondag-info"
-ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)
- : TargetSelectionDAGInfo(&DL) {}
-
-ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
-}
-
// Emit, if possible, a specialized version of the given Libcall. Typically this
// means selecting the appropriately aligned version, but we also convert memset
// of 0 into memclr.
@@ -83,7 +77,7 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext());
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
Entry.Node = Dst;
Args.push_back(Entry);
if (AEABILibcall == AEABI_MEMCLR) {
@@ -121,12 +115,14 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
{ "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
};
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(LC),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
- TLI->getPointerTy()), std::move(Args), 0)
- .setDiscardResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
+ TLI->getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0)
+ .setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 1db190f41e1a..289879ee1d7e 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -37,8 +37,6 @@ namespace ARM_AM {
class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit ARMSelectionDAGInfo(const DataLayout &DL);
- ~ARMSelectionDAGInfo();
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 55808dfb9efe..002c3e9b6291 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -112,7 +112,6 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
: ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
TargetTriple(TT), Options(TM.Options), TM(TM),
- TSInfo(*TM.getDataLayout()),
FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
@@ -172,6 +171,7 @@ void ARMSubtarget::initializeEnvironment() {
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
+ GenLongCalls = false;
UnsafeFPMath = false;
}
@@ -286,7 +286,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
if (RelocM == Reloc::Static)
return false;
- bool isDecl = GV->isDeclarationForLinker();
+ bool isDef = GV->isStrongDefinitionForLinker();
if (!isTargetMachO()) {
// Extra load is needed for all externally visible.
@@ -294,34 +294,22 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
return false;
return true;
} else {
- if (RelocM == Reloc::PIC_) {
- // If this is a strong reference to a definition, it is definitely not
- // through a stub.
- if (!isDecl && !GV->isWeakForLinker())
- return false;
-
- // Unless we have a symbol with hidden visibility, we have to go through a
- // normal $non_lazy_ptr stub because this symbol might be resolved late.
- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
- return true;
+ // If this is a strong reference to a definition, it is definitely not
+ // through a stub.
+ if (isDef)
+ return false;
+
+ // Unless we have a symbol with hidden visibility, we have to go through a
+ // normal $non_lazy_ptr stub because this symbol might be resolved late.
+ if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
+ return true;
+ if (RelocM == Reloc::PIC_) {
// If symbol visibility is hidden, we have a stub for common symbol
// references and external declarations.
- if (isDecl || GV->hasCommonLinkage())
+ if (GV->isDeclarationForLinker() || GV->hasCommonLinkage())
// Hidden $non_lazy_ptr reference.
return true;
-
- return false;
- } else {
- // If this is a strong reference to a definition, it is definitely not
- // through a stub.
- if (!isDecl && !GV->isWeakForLinker())
- return false;
-
- // Unless we have a symbol with hidden visibility, we have to go through a
- // normal $non_lazy_ptr stub because this symbol might be resolved late.
- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
- return true;
}
}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 9909a6a6d198..dd101df9b63d 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -206,6 +206,9 @@ protected:
/// NaCl TRAP instruction is generated instead of the regular TRAP.
bool UseNaClTrap;
+ /// Generate calls via indirect call instructions.
+ bool GenLongCalls;
+
/// Target machine allowed unsafe FP math (such as use of NEON fp)
bool UnsafeFPMath;
@@ -342,6 +345,7 @@ public:
bool hasMPExtension() const { return HasMPExtension; }
bool hasThumb2DSP() const { return Thumb2DSP; }
bool useNaClTrap() const { return UseNaClTrap; }
+ bool genLongCalls() const { return GenLongCalls; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6e81bd2d349d..93495d66ae70 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -80,8 +80,7 @@ computeTargetABI(const Triple &TT, StringRef CPU,
// FIXME: This is duplicated code from the front end and should be unified.
if (TT.isOSBinFormatMachO()) {
if (TT.getEnvironment() == llvm::Triple::EABI ||
- (TT.getOS() == llvm::Triple::UnknownOS &&
- TT.getObjectFormat() == llvm::Triple::MachO) ||
+ (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
CPU.startswith("cortex-m")) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
} else {
@@ -104,8 +103,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
break;
default:
- if (TT.getOS() == llvm::Triple::NetBSD)
- TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+ if (TT.isOSNetBSD())
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
else
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
break;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index f4901fc24e44..2f194cf7ae06 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -61,14 +61,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
ISD == ISD::FP_EXTEND)) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
if (Idx != -1)
return LT.first * NEONFltDblTbl[Idx].Cost;
}
- EVT SrcTy = TLI->getValueType(Src);
- EVT DstTy = TLI->getValueType(Dst);
+ EVT SrcTy = TLI->getValueType(DL, Src);
+ EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src);
@@ -282,8 +282,8 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
};
- EVT SelCondTy = TLI->getValueType(CondTy);
- EVT SelValTy = TLI->getValueType(ValTy);
+ EVT SelCondTy = TLI->getValueType(DL, CondTy);
+ EVT SelValTy = TLI->getValueType(DL, ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
SelCondTy.getSimpleVT(),
@@ -292,7 +292,7 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return NEONVectorSelectTbl[Idx].Cost;
}
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
return LT.first;
}
@@ -353,7 +353,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
@@ -379,7 +379,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
int Idx =
CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
@@ -395,7 +395,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
TTI::OperandValueProperties Opd2PropInfo) {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
const unsigned FunctionCallDivCost = 20;
const unsigned ReciprocalDivCost = 10;
@@ -468,7 +468,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isDoubleTy()) {
@@ -488,12 +488,12 @@ unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
assert(isa<VectorType>(VecTy) && "Expect a vector type");
// vldN/vstN doesn't support vector types of i64/f64 element.
- bool EltIs64Bits = DL->getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
+ bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
unsigned NumElts = VecTy->getVectorNumElements();
Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);
+ unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index f2e5db655ccf..84f256f73722 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -42,7 +42,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
public:
explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F)
- : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
ARMTTIImpl(const ARMTTIImpl &Arg)
@@ -50,18 +51,6 @@ public:
ARMTTIImpl(ARMTTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- ARMTTIImpl &operator=(const ARMTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- ARMTTIImpl &operator=(ARMTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
/// \name Scalar TTI Implementations
/// @{
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c2db74619871..f8f0eb2d4baa 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -189,9 +189,9 @@ class ARMAsmParser : public MCTargetAsmParser {
return getParser().Error(L, Msg, Ranges);
}
- bool validatetLDMRegList(MCInst Inst, const OperandVector &Operands,
+ bool validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands,
unsigned ListNo, bool IsARPop = false);
- bool validatetSTMRegList(MCInst Inst, const OperandVector &Operands,
+ bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands,
unsigned ListNo);
int tryParseRegister();
@@ -242,6 +242,8 @@ class ARMAsmParser : public MCTargetAsmParser {
bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode);
+ void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting,
+ OperandVector &Operands);
bool isThumb() const {
// FIXME: Can tablegen auto-generate this?
return STI.getFeatureBits()[ARM::ModeThumb];
@@ -5465,6 +5467,92 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
CanAcceptPredicationCode = true;
}
+// \brief Some Thumb instructions have two operand forms that are not
+// available as three operand, convert to two operand form if possible.
+//
+// FIXME: We would really like to be able to tablegen'erate this.
+void ARMAsmParser::tryConvertingToTwoOperandForm(StringRef Mnemonic,
+ bool CarrySetting,
+ OperandVector &Operands) {
+ if (Operands.size() != 6)
+ return;
+
+ const auto &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+ auto &Op4 = static_cast<ARMOperand &>(*Operands[4]);
+ if (!Op3.isReg() || !Op4.isReg())
+ return;
+
+ auto Op3Reg = Op3.getReg();
+ auto Op4Reg = Op4.getReg();
+
+ // For most Thumb2 cases we just generate the 3 operand form and reduce
+ // it in processInstruction(), but the 3 operand form of ADD (t2ADDrr)
+ // won't accept SP or PC so we do the transformation here taking care
+ // with immediate range in the 'add sp, sp #imm' case.
+ auto &Op5 = static_cast<ARMOperand &>(*Operands[5]);
+ if (isThumbTwo()) {
+ if (Mnemonic != "add")
+ return;
+ bool TryTransform = Op3Reg == ARM::PC || Op4Reg == ARM::PC ||
+ (Op5.isReg() && Op5.getReg() == ARM::PC);
+ if (!TryTransform) {
+ TryTransform = (Op3Reg == ARM::SP || Op4Reg == ARM::SP ||
+ (Op5.isReg() && Op5.getReg() == ARM::SP)) &&
+ !(Op3Reg == ARM::SP && Op4Reg == ARM::SP &&
+ Op5.isImm() && !Op5.isImm0_508s4());
+ }
+ if (!TryTransform)
+ return;
+ } else if (!isThumbOne())
+ return;
+
+ if (!(Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||
+ Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||
+ Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic"))
+ return;
+
+ // If first 2 operands of a 3 operand instruction are the same
+ // then transform to 2 operand version of the same instruction
+ // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'
+ bool Transform = Op3Reg == Op4Reg;
+
+ // For communtative operations, we might be able to transform if we swap
+ // Op4 and Op5. The 'ADD Rdm, SP, Rdm' form is already handled specially
+ // as tADDrsp.
+ const ARMOperand *LastOp = &Op5;
+ bool Swap = false;
+ if (!Transform && Op5.isReg() && Op3Reg == Op5.getReg() &&
+ ((Mnemonic == "add" && Op4Reg != ARM::SP) ||
+ Mnemonic == "and" || Mnemonic == "eor" ||
+ Mnemonic == "adc" || Mnemonic == "orr")) {
+ Swap = true;
+ LastOp = &Op4;
+ Transform = true;
+ }
+
+ // If both registers are the same then remove one of them from
+ // the operand list, with certain exceptions.
+ if (Transform) {
+ // Don't transform 'adds Rd, Rd, Rm' or 'sub{s} Rd, Rd, Rm' because the
+ // 2 operand forms don't exist.
+ if (((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub") &&
+ LastOp->isReg())
+ Transform = false;
+
+ // Don't transform 'add/sub{s} Rd, Rd, #imm' if the immediate fits into
+ // 3-bits because the ARMARM says not to.
+ if ((Mnemonic == "add" || Mnemonic == "sub") && LastOp->isImm0_7())
+ Transform = false;
+ }
+
+ if (Transform) {
+ if (Swap)
+ std::swap(Op4, Op5);
+ Operands.erase(Operands.begin() + 3);
+ }
+}
+
bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
OperandVector &Operands) {
// FIXME: This is all horribly hacky. We really need a better way to deal
@@ -5838,6 +5926,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
"VFP/Neon double precision register expected");
}
+ tryConvertingToTwoOperandForm(Mnemonic, CarrySetting, Operands);
+
// Some instructions, mostly Thumb, have forms for the same mnemonic that
// do and don't have a cc_out optional-def operand. With some spot-checks
// of the operand list, we can figure out which variant we're trying to
@@ -5901,48 +5991,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
}
- // If first 2 operands of a 3 operand instruction are the same
- // then transform to 2 operand version of the same instruction
- // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'
- // FIXME: We would really like to be able to tablegen'erate this.
- if (isThumbOne() && Operands.size() == 6 &&
- (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||
- Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
- Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||
- Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) {
- ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
- ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]);
- ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]);
-
- // If both registers are the same then remove one of them from
- // the operand list.
- if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) {
- // If 3rd operand (variable Op5) is a register and the instruction is adds/sub
- // then do not transform as the backend already handles this instruction
- // correctly.
- if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) {
- Operands.erase(Operands.begin() + 3);
- if (Mnemonic == "add" && !CarrySetting) {
- // Special case for 'add' (not 'adds') instruction must
- // remove the CCOut operand as well.
- Operands.erase(Operands.begin() + 1);
- }
- }
- }
- }
-
- // If instruction is 'add' and first two register operands
- // use SP register, then remove one of the SP registers from
- // the instruction.
- // FIXME: We would really like to be able to tablegen'erate this.
- if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) {
- ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
- ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
- if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) {
- Operands.erase(Operands.begin() + 2);
- }
- }
-
// GNU Assembler extension (compatibility)
if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
@@ -5985,8 +6033,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// return 'true' if register list contains non-low GPR registers,
// 'false' otherwise. If Reg is in the register list or is HiReg, set
// 'containsReg' to true.
-static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,
- unsigned HiReg, bool &containsReg) {
+static bool checkLowRegisterList(const MCInst &Inst, unsigned OpNo,
+ unsigned Reg, unsigned HiReg,
+ bool &containsReg) {
containsReg = false;
for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
unsigned OpReg = Inst.getOperand(i).getReg();
@@ -6001,8 +6050,8 @@ static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,
// Check if the specified regisgter is in the register list of the inst,
// starting at the indicated operand number.
-static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
- for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+static bool listContainsReg(const MCInst &Inst, unsigned OpNo, unsigned Reg) {
+ for (unsigned i = OpNo, e = Inst.getNumOperands(); i < e; ++i) {
unsigned OpReg = Inst.getOperand(i).getReg();
if (OpReg == Reg)
return true;
@@ -6020,7 +6069,7 @@ static bool instIsBreakpoint(const MCInst &Inst) {
}
-bool ARMAsmParser::validatetLDMRegList(MCInst Inst,
+bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst,
const OperandVector &Operands,
unsigned ListNo, bool IsARPop) {
const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
@@ -6043,7 +6092,7 @@ bool ARMAsmParser::validatetLDMRegList(MCInst Inst,
return false;
}
-bool ARMAsmParser::validatetSTMRegList(MCInst Inst,
+bool ARMAsmParser::validatetSTMRegList(const MCInst &Inst,
const OperandVector &Operands,
unsigned ListNo) {
const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
@@ -8167,8 +8216,16 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// If the destination and first source operand are the same, and
// there's no setting of the flags, use encoding T2 instead of T3.
// Note that this is only for ADD, not SUB. This mirrors the system
- // 'as' behaviour. Make sure the wide encoding wasn't explicit.
- if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ // 'as' behaviour. Also take advantage of ADD being commutative.
+ // Make sure the wide encoding wasn't explicit.
+ bool Swap = false;
+ auto DestReg = Inst.getOperand(0).getReg();
+ bool Transform = DestReg == Inst.getOperand(1).getReg();
+ if (!Transform && DestReg == Inst.getOperand(2).getReg()) {
+ Transform = true;
+ Swap = true;
+ }
+ if (!Transform ||
Inst.getOperand(5).getReg() != 0 ||
(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
@@ -8177,7 +8234,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
TmpInst.setOpcode(ARM::tADDhirr);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(0));
- TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(Swap ? 1 : 2));
TmpInst.addOperand(Inst.getOperand(3));
TmpInst.addOperand(Inst.getOperand(4));
Inst = TmpInst;
@@ -9176,8 +9233,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
return false;
}
- STI.InitMCProcessorInfo(CPU, "");
- STI.InitCPUSchedModel(CPU);
+ STI.setDefaultFeatures(CPU);
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
return false;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 4d12bfb5d60f..d17fdb95dbdf 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -1362,7 +1362,7 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
const MCSubtargetInfo &STI) {
const Triple &TT = STI.getTargetTriple();
- if (TT.getObjectFormat() == Triple::ELF)
+ if (TT.isOSBinFormatELF())
return new ARMTargetELFStreamer(S);
return new ARMTargetStreamer(S);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index fafe25ae5be5..21c9fc1e58b2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "ARMGenRegisterInfo.inc"
-static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&
(MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&
@@ -63,7 +63,7 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
return false;
}
-static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() &&
MI.getOperand(1).getImm() != 8) {
@@ -75,7 +75,7 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
return false;
}
-static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
"cannot predicate thumb instructions");
@@ -92,7 +92,7 @@ static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
return false;
}
-static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
"cannot predicate thumb instructions");
@@ -257,9 +257,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
ArchFS = FS;
}
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitARMMCSubtargetInfo(X, TT, CPU, ArchFS);
- return X;
+ return createARMMCSubtargetInfoImpl(TT, CPU, ArchFS);
}
static MCInstrInfo *createARMMCInstrInfo() {
@@ -268,7 +266,7 @@ static MCInstrInfo *createARMMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
+static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
return X;
@@ -279,10 +277,10 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())
MAI = new ARMMCAsmInfoDarwin(TheTriple);
- else if (TheTriple.isWindowsItaniumEnvironment())
- MAI = new ARMCOFFMCAsmInfoGNU();
else if (TheTriple.isWindowsMSVCEnvironment())
MAI = new ARMCOFFMCAsmInfoMicrosoft();
+ else if (TheTriple.isOSWindows())
+ MAI = new ARMCOFFMCAsmInfoGNU();
else
MAI = new ARMELFMCAsmInfo(TheTriple);
@@ -292,14 +290,13 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createARMMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default) {
- Triple TheTriple(TT);
// Default relocation model on Darwin is PIC, not DynamicNoPIC.
- RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
+ RM = TT.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
}
X->initMCCodeGenInfo(RM, CM, OL);
return X;
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 77cd890e4cad..3b4358b5d9bf 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -365,7 +365,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
// frame pointer stack slot, the target is ELF and the function has FP, or
// the target uses var sized objects.
if (NumBytes) {
- assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
TII, *RegInfo);
diff --git a/lib/Target/BPF/BPFFrameLowering.cpp b/lib/Target/BPF/BPFFrameLowering.cpp
index 54c5ececc7de..c2806c85f24f 100644
--- a/lib/Target/BPF/BPFFrameLowering.cpp
+++ b/lib/Target/BPF/BPFFrameLowering.cpp
@@ -29,12 +29,12 @@ void BPFFrameLowering::emitPrologue(MachineFunction &MF,
void BPFFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {}
-void BPFFrameLowering::processFunctionBeforeCalleeSavedScan(
- MachineFunction &MF, RegScavenger *RS) const {
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- MRI.setPhysRegUnused(BPF::R6);
- MRI.setPhysRegUnused(BPF::R7);
- MRI.setPhysRegUnused(BPF::R8);
- MRI.setPhysRegUnused(BPF::R9);
+void BPFFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+ SavedRegs.reset(BPF::R6);
+ SavedRegs.reset(BPF::R7);
+ SavedRegs.reset(BPF::R8);
+ SavedRegs.reset(BPF::R9);
}
diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h
index 3b9fc443e053..251cda965ff5 100644
--- a/lib/Target/BPF/BPFFrameLowering.h
+++ b/lib/Target/BPF/BPFFrameLowering.h
@@ -28,8 +28,8 @@ public:
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool hasFP(const MachineFunction &MF) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
void
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index 38c56bbef81e..58498a1aec7d 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -302,8 +302,9 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getContext()->diagnose(Err);
}
+ auto PtrVT = getPointerTy(MF.getDataLayout());
Chain = DAG.getCALLSEQ_START(
- Chain, DAG.getConstant(NumBytes, CLI.DL, getPointerTy(), true), CLI.DL);
+ Chain, DAG.getConstant(NumBytes, CLI.DL, PtrVT, true), CLI.DL);
SmallVector<std::pair<unsigned, SDValue>, 5> RegsToPass;
@@ -350,10 +351,10 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy(),
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,
G->getOffset(), 0);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy(), 0);
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -374,8 +375,8 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Create the CALLSEQ_END node.
Chain = DAG.getCALLSEQ_END(
- Chain, DAG.getConstant(NumBytes, CLI.DL, getPointerTy(), true),
- DAG.getConstant(0, CLI.DL, getPointerTy(), true), InFlag, CLI.DL);
+ Chain, DAG.getConstant(NumBytes, CLI.DL, PtrVT, true),
+ DAG.getConstant(0, CLI.DL, PtrVT, true), InFlag, CLI.DL);
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
diff --git a/lib/Target/BPF/BPFSubtarget.cpp b/lib/Target/BPF/BPFSubtarget.cpp
index 65acd585116d..c3a8b1caa63d 100644
--- a/lib/Target/BPF/BPFSubtarget.cpp
+++ b/lib/Target/BPF/BPFSubtarget.cpp
@@ -28,4 +28,4 @@ void BPFSubtarget::anchor() {}
BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
: BPFGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this),
- TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {}
+ TLInfo(TM, *this) {}
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 3e928fc93a37..840570ebc392 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -40,7 +40,7 @@ static MCInstrInfo *createBPFMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createBPFMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createBPFMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitBPFMCRegisterInfo(X, BPF::R11 /* RAReg doesn't exist */);
return X;
@@ -48,12 +48,10 @@ static MCRegisterInfo *createBPFMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *createBPFMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitBPFMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createBPFMCSubtargetInfoImpl(TT, CPU, FS);
}
-static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createBPFMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index bc5d7f65b2f6..272688edb8a1 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -2148,8 +2148,8 @@ char CppWriter::ID = 0;
bool CPPTargetMachine::addPassesToEmitFile(
PassManagerBase &PM, raw_pwrite_stream &o, CodeGenFileType FileType,
- bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter,
- MachineFunctionInitializer *MFInitializer) {
+ bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
+ AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) {
if (FileType != TargetMachine::CGFT_AssemblyFile)
return true;
auto FOut = llvm::make_unique<formatted_raw_ostream>(o);
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index ebf0635b12e4..00e402feffbc 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -31,7 +31,8 @@ struct CPPTargetMachine : public TargetMachine {
public:
bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
CodeGenFileType FileType, bool DisableVerify,
- AnalysisID StartAfter, AnalysisID StopAfter,
+ AnalysisID StartBefore, AnalysisID StartAfter,
+ AnalysisID StopAfter,
MachineFunctionInitializer *MFInitializer) override;
};
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
new file mode 100644
index 000000000000..cb7e633fb82f
--- /dev/null
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -0,0 +1,1127 @@
+//===--- BitTracker.cpp ---------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// SSA-based bit propagation.
+//
+// The purpose of this code is, for a given virtual register, to provide
+// information about the value of each bit in the register. The values
+// of bits are represented by the class BitValue, and take one of four
+// cases: 0, 1, "ref" and "bottom". The 0 and 1 are rather clear, the
+// "ref" value means that the bit is a copy of another bit (which itself
+// cannot be a copy of yet another bit---such chains are not allowed).
+// A "ref" value is associated with a BitRef structure, which indicates
+// which virtual register, and which bit in that register is the origin
+// of the value. For example, given an instruction
+// vreg2 = ASL vreg1, 1
+// assuming that nothing is known about bits of vreg1, bit 1 of vreg2
+// will be a "ref" to (vreg1, 0). If there is a subsequent instruction
+// vreg3 = ASL vreg2, 2
+// then bit 3 of vreg3 will be a "ref" to (vreg1, 0) as well.
+// The "bottom" case means that the bit's value cannot be determined,
+// and that this virtual register actually defines it. The "bottom" case
+// is discussed in detail in BitTracker.h. In fact, "bottom" is a "ref
+// to self", so for the vreg1 above, the bit 0 of it will be a "ref" to
+// (vreg1, 0), bit 1 will be a "ref" to (vreg1, 1), etc.
+//
+// The tracker implements the Wegman-Zadeck algorithm, originally developed
+// for SSA-based constant propagation. Each register is represented as
+// a sequence of bits, with the convention that bit 0 is the least signi-
+// ficant bit. Each bit is propagated individually. The class RegisterCell
+// implements the register's representation, and is also the subject of
+// the lattice operations in the tracker.
+//
+// The intended usage of the bit tracker is to create a target-specific
+// machine instruction evaluator, pass the evaluator to the BitTracker
+// object, and run the tracker. The tracker will then collect the bit
+// value information for a given machine function. After that, it can be
+// queried for the cells for each virtual register.
+// Sample code:
+// const TargetSpecificEvaluator TSE(TRI, MRI);
+// BitTracker BT(TSE, MF);
+// BT.run();
+// ...
+// unsigned Reg = interestingRegister();
+// RegisterCell RC = BT.get(Reg);
+// if (RC[3].is(1))
+// Reg0bit3 = 1;
+//
+// The code below is intended to be fully target-independent.
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include "BitTracker.h"
+
+using namespace llvm;
+
+typedef BitTracker BT;
+
+namespace {
+ // Local trickery to pretty print a register (without the whole "%vreg"
+ // business).
+ struct printv {
+ printv(unsigned r) : R(r) {}
+ unsigned R;
+ };
+ raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
+ if (PV.R)
+ OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
+ else
+ OS << 's';
+ return OS;
+ }
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::BitValue &BV) {
+ switch (BV.Type) {
+ case BT::BitValue::Top:
+ OS << 'T';
+ break;
+ case BT::BitValue::Zero:
+ OS << '0';
+ break;
+ case BT::BitValue::One:
+ OS << '1';
+ break;
+ case BT::BitValue::Ref:
+ OS << printv(BV.RefI.Reg) << '[' << BV.RefI.Pos << ']';
+ break;
+ }
+ return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::RegisterCell &RC) {
+ unsigned n = RC.Bits.size();
+ OS << "{ w:" << n;
+ // Instead of printing each bit value individually, try to group them
+ // into logical segments, such as sequences of 0 or 1 bits or references
+ // to consecutive bits (e.g. "bits 3-5 are same as bits 7-9 of reg xyz").
+ // "Start" will be the index of the beginning of the most recent segment.
+ unsigned Start = 0;
+ bool SeqRef = false; // A sequence of refs to consecutive bits.
+ bool ConstRef = false; // A sequence of refs to the same bit.
+
+ for (unsigned i = 1, n = RC.Bits.size(); i < n; ++i) {
+ const BT::BitValue &V = RC[i];
+ const BT::BitValue &SV = RC[Start];
+ bool IsRef = (V.Type == BT::BitValue::Ref);
+ // If the current value is the same as Start, skip to the next one.
+ if (!IsRef && V == SV)
+ continue;
+ if (IsRef && SV.Type == BT::BitValue::Ref && V.RefI.Reg == SV.RefI.Reg) {
+ if (Start+1 == i) {
+ SeqRef = (V.RefI.Pos == SV.RefI.Pos+1);
+ ConstRef = (V.RefI.Pos == SV.RefI.Pos);
+ }
+ if (SeqRef && V.RefI.Pos == SV.RefI.Pos+(i-Start))
+ continue;
+ if (ConstRef && V.RefI.Pos == SV.RefI.Pos)
+ continue;
+ }
+
+ // The current value is different. Print the previous one and reset
+ // the Start.
+ OS << " [" << Start;
+ unsigned Count = i - Start;
+ if (Count == 1) {
+ OS << "]:" << SV;
+ } else {
+ OS << '-' << i-1 << "]:";
+ if (SV.Type == BT::BitValue::Ref && SeqRef)
+ OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-'
+ << SV.RefI.Pos+(Count-1) << ']';
+ else
+ OS << SV;
+ }
+ Start = i;
+ SeqRef = ConstRef = false;
+ }
+
+ OS << " [" << Start;
+ unsigned Count = n - Start;
+ if (n-Start == 1) {
+ OS << "]:" << RC[Start];
+ } else {
+ OS << '-' << n-1 << "]:";
+ const BT::BitValue &SV = RC[Start];
+ if (SV.Type == BT::BitValue::Ref && SeqRef)
+ OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-'
+ << SV.RefI.Pos+(Count-1) << ']';
+ else
+ OS << SV;
+ }
+ OS << " }";
+
+ return OS;
+}
+
+BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
+ : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}
+
+BitTracker::~BitTracker() {
+ delete &Map;
+}
+
+
+// If we were allowed to update a cell for a part of a register, the meet
+// operation would need to be parametrized by the register number and the
+// exact part of the register, so that the computer BitRefs correspond to
+// the actual bits of the "self" register.
+// While this cannot happen in the current implementation, I'm not sure
+// if this should be ruled out in the future.
+bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
+ // An example when "meet" can be invoked with SelfR == 0 is a phi node
+ // with a physical register as an operand.
+ assert(SelfR == 0 || TargetRegisterInfo::isVirtualRegister(SelfR));
+ bool Changed = false;
+ for (uint16_t i = 0, n = Bits.size(); i < n; ++i) {
+ const BitValue &RCV = RC[i];
+ Changed |= Bits[i].meet(RCV, BitRef(SelfR, i));
+ }
+ return Changed;
+}
+
+
+// Insert the entire cell RC into the current cell at position given by M.
+BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
+ const BitMask &M) {
+ uint16_t B = M.first(), E = M.last(), W = width();
+ // Sanity: M must be a valid mask for *this.
+ assert(B < W && E < W);
+ // Sanity: the masked part of *this must have the same number of bits
+ // as the source.
+ assert(B > E || E-B+1 == RC.width()); // B <= E => E-B+1 = |RC|.
+ assert(B <= E || E+(W-B)+1 == RC.width()); // E < B => E+(W-B)+1 = |RC|.
+ if (B <= E) {
+ for (uint16_t i = 0; i <= E-B; ++i)
+ Bits[i+B] = RC[i];
+ } else {
+ for (uint16_t i = 0; i < W-B; ++i)
+ Bits[i+B] = RC[i];
+ for (uint16_t i = 0; i <= E; ++i)
+ Bits[i] = RC[i+(W-B)];
+ }
+ return *this;
+}
+
+
+BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
+ uint16_t B = M.first(), E = M.last(), W = width();
+ assert(B < W && E < W);
+ if (B <= E) {
+ RegisterCell RC(E-B+1);
+ for (uint16_t i = B; i <= E; ++i)
+ RC.Bits[i-B] = Bits[i];
+ return RC;
+ }
+
+ RegisterCell RC(E+(W-B)+1);
+ for (uint16_t i = 0; i < W-B; ++i)
+ RC.Bits[i] = Bits[i+B];
+ for (uint16_t i = 0; i <= E; ++i)
+ RC.Bits[i+(W-B)] = Bits[i];
+ return RC;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
+ // Rotate left (i.e. towards increasing bit indices).
+ // Swap the two parts: [0..W-Sh-1] [W-Sh..W-1]
+ uint16_t W = width();
+ Sh = Sh % W;
+ if (Sh == 0)
+ return *this;
+
+ RegisterCell Tmp(W-Sh);
+ // Tmp = [0..W-Sh-1].
+ for (uint16_t i = 0; i < W-Sh; ++i)
+ Tmp[i] = Bits[i];
+ // Shift [W-Sh..W-1] to [0..Sh-1].
+ for (uint16_t i = 0; i < Sh; ++i)
+ Bits[i] = Bits[W-Sh+i];
+ // Copy Tmp to [Sh..W-1].
+ for (uint16_t i = 0; i < W-Sh; ++i)
+ Bits[i+Sh] = Tmp.Bits[i];
+ return *this;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
+ const BitValue &V) {
+ assert(B <= E);
+ while (B < E)
+ Bits[B++] = V;
+ return *this;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
+ // Append the cell given as the argument to the "this" cell.
+ // Bit 0 of RC becomes bit W of the result, where W is this->width().
+ uint16_t W = width(), WRC = RC.width();
+ Bits.resize(W+WRC);
+ for (uint16_t i = 0; i < WRC; ++i)
+ Bits[i+W] = RC.Bits[i];
+ return *this;
+}
+
+
+uint16_t BT::RegisterCell::ct(bool B) const {
+ uint16_t W = width();
+ uint16_t C = 0;
+ BitValue V = B;
+ while (C < W && Bits[C] == V)
+ C++;
+ return C;
+}
+
+
+uint16_t BT::RegisterCell::cl(bool B) const {
+ uint16_t W = width();
+ uint16_t C = 0;
+ BitValue V = B;
+ while (C < W && Bits[W-(C+1)] == V)
+ C++;
+ return C;
+}
+
+
+bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
+ uint16_t W = Bits.size();
+ if (RC.Bits.size() != W)
+ return false;
+ for (uint16_t i = 0; i < W; ++i)
+ if (Bits[i] != RC[i])
+ return false;
+ return true;
+}
+
+
+uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
+ // The general problem is with finding a register class that corresponds
+ // to a given reference reg:sub. There can be several such classes, and
+ // since we only care about the register size, it does not matter which
+ // such class we would find.
+ // The easiest way to accomplish what we want is to
+ // 1. find a physical register PhysR from the same class as RR.Reg,
+ // 2. find a physical register PhysS that corresponds to PhysR:RR.Sub,
+ // 3. find a register class that contains PhysS.
+ unsigned PhysR;
+ if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) {
+ const TargetRegisterClass *VC = MRI.getRegClass(RR.Reg);
+ assert(VC->begin() != VC->end() && "Empty register class");
+ PhysR = *VC->begin();
+ } else {
+ assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg));
+ PhysR = RR.Reg;
+ }
+
+ unsigned PhysS = (RR.Sub == 0) ? PhysR : TRI.getSubReg(PhysR, RR.Sub);
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PhysS);
+ uint16_t BW = RC->getSize()*8;
+ return BW;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
+ const CellMapType &M) const {
+ uint16_t BW = getRegBitWidth(RR);
+
+ // Physical registers are assumed to be present in the map with an unknown
+ // value. Don't actually insert anything in the map, just return the cell.
+ if (TargetRegisterInfo::isPhysicalRegister(RR.Reg))
+ return RegisterCell::self(0, BW);
+
+ assert(TargetRegisterInfo::isVirtualRegister(RR.Reg));
+ // For virtual registers that belong to a class that is not tracked,
+ // generate an "unknown" value as well.
+ const TargetRegisterClass *C = MRI.getRegClass(RR.Reg);
+ if (!track(C))
+ return RegisterCell::self(0, BW);
+
+ CellMapType::const_iterator F = M.find(RR.Reg);
+ if (F != M.end()) {
+ if (!RR.Sub)
+ return F->second;
+ BitMask M = mask(RR.Reg, RR.Sub);
+ return F->second.extract(M);
+ }
+ // If not found, create a "top" entry, but do not insert it in the map.
+ return RegisterCell::top(BW);
+}
+
+
+void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
+ CellMapType &M) const {
+ // While updating the cell map can be done in a meaningful way for
+ // a part of a register, it makes little sense to implement it as the
+ // SSA representation would never contain such "partial definitions".
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return;
+ assert(RR.Sub == 0 && "Unexpected sub-register in definition");
+ // Eliminate all ref-to-reg-0 bit values: replace them with "self".
+ for (unsigned i = 0, n = RC.width(); i < n; ++i) {
+ const BitValue &V = RC[i];
+ if (V.Type == BitValue::Ref && V.RefI.Reg == 0)
+ RC[i].RefI = BitRef(RR.Reg, i);
+ }
+ M[RR.Reg] = RC;
+}
+
+
+// Check if the cell represents a compile-time integer value.
+bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
+ uint16_t W = A.width();
+ for (uint16_t i = 0; i < W; ++i)
+ if (!A[i].is(0) && !A[i].is(1))
+ return false;
+ return true;
+}
+
+
+// Convert a cell to the integer value. The result must fit in uint64_t.
+uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
+ assert(isInt(A));
+ uint64_t Val = 0;
+ uint16_t W = A.width();
+ for (uint16_t i = 0; i < W; ++i) {
+ Val <<= 1;
+ Val |= A[i].is(1);
+ }
+ return Val;
+}
+
+
+// Evaluator helper functions. These implement some common operation on
+// register cells that can be used to implement target-specific instructions
+// in a target-specific evaluator.
+
+BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const {
+ RegisterCell Res(W);
+ // For bits beyond the 63rd, this will generate the sign bit of V.
+ for (uint16_t i = 0; i < W; ++i) {
+ Res[i] = BitValue(V & 1);
+ V >>= 1;
+ }
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
+ APInt A = CI->getValue();
+ uint16_t BW = A.getBitWidth();
+ assert((unsigned)BW == A.getBitWidth() && "BitWidth overflow");
+ RegisterCell Res(BW);
+ for (uint16_t i = 0; i < BW; ++i)
+ Res[i] = A[i];
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width();
+ assert(W == A2.width());
+ RegisterCell Res(W);
+ bool Carry = false;
+ uint16_t I;
+ for (I = 0; I < W; ++I) {
+ const BitValue &V1 = A1[I];
+ const BitValue &V2 = A2[I];
+ if (!V1.num() || !V2.num())
+ break;
+ unsigned S = bool(V1) + bool(V2) + Carry;
+ Res[I] = BitValue(S & 1);
+ Carry = (S > 1);
+ }
+ for (; I < W; ++I) {
+ const BitValue &V1 = A1[I];
+ const BitValue &V2 = A2[I];
+ // If the next bit is same as Carry, the result will be 0 plus the
+ // other bit. The Carry bit will remain unchanged.
+ if (V1.is(Carry))
+ Res[I] = BitValue::ref(V2);
+ else if (V2.is(Carry))
+ Res[I] = BitValue::ref(V1);
+ else
+ break;
+ }
+ for (; I < W; ++I)
+ Res[I] = BitValue::self();
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width();
+ assert(W == A2.width());
+ RegisterCell Res(W);
+ bool Borrow = false;
+ uint16_t I;
+ for (I = 0; I < W; ++I) {
+ const BitValue &V1 = A1[I];
+ const BitValue &V2 = A2[I];
+ if (!V1.num() || !V2.num())
+ break;
+ unsigned S = bool(V1) - bool(V2) - Borrow;
+ Res[I] = BitValue(S & 1);
+ Borrow = (S > 1);
+ }
+ for (; I < W; ++I) {
+ const BitValue &V1 = A1[I];
+ const BitValue &V2 = A2[I];
+ if (V1.is(Borrow)) {
+ Res[I] = BitValue::ref(V2);
+ break;
+ }
+ if (V2.is(Borrow))
+ Res[I] = BitValue::ref(V1);
+ else
+ break;
+ }
+ for (; I < W; ++I)
+ Res[I] = BitValue::self();
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width() + A2.width();
+ uint16_t Z = A1.ct(0) + A2.ct(0);
+ RegisterCell Res(W);
+ Res.fill(0, Z, BitValue::Zero);
+ Res.fill(Z, W, BitValue::self());
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width() + A2.width();
+ uint16_t Z = A1.ct(0) + A2.ct(0);
+ RegisterCell Res(W);
+ Res.fill(0, Z, BitValue::Zero);
+ Res.fill(Z, W, BitValue::self());
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
+ uint16_t Sh) const {
+ assert(Sh <= A1.width());
+ RegisterCell Res = RegisterCell::ref(A1);
+ Res.rol(Sh);
+ Res.fill(0, Sh, BitValue::Zero);
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
+ uint16_t Sh) const {
+ uint16_t W = A1.width();
+ assert(Sh <= W);
+ RegisterCell Res = RegisterCell::ref(A1);
+ Res.rol(W-Sh);
+ Res.fill(W-Sh, W, BitValue::Zero);
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
+ uint16_t Sh) const {
+ uint16_t W = A1.width();
+ assert(Sh <= W);
+ RegisterCell Res = RegisterCell::ref(A1);
+ BitValue Sign = Res[W-1];
+ Res.rol(W-Sh);
+ Res.fill(W-Sh, W, Sign);
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width();
+ assert(W == A2.width());
+ RegisterCell Res(W);
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitValue &V1 = A1[i];
+ const BitValue &V2 = A2[i];
+ if (V1.is(1))
+ Res[i] = BitValue::ref(V2);
+ else if (V2.is(1))
+ Res[i] = BitValue::ref(V1);
+ else if (V1.is(0) || V2.is(0))
+ Res[i] = BitValue::Zero;
+ else if (V1 == V2)
+ Res[i] = V1;
+ else
+ Res[i] = BitValue::self();
+ }
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width();
+ assert(W == A2.width());
+ RegisterCell Res(W);
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitValue &V1 = A1[i];
+ const BitValue &V2 = A2[i];
+ if (V1.is(1) || V2.is(1))
+ Res[i] = BitValue::One;
+ else if (V1.is(0))
+ Res[i] = BitValue::ref(V2);
+ else if (V2.is(0))
+ Res[i] = BitValue::ref(V1);
+ else if (V1 == V2)
+ Res[i] = V1;
+ else
+ Res[i] = BitValue::self();
+ }
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width();
+ assert(W == A2.width());
+ RegisterCell Res(W);
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitValue &V1 = A1[i];
+ const BitValue &V2 = A2[i];
+ if (V1.is(0))
+ Res[i] = BitValue::ref(V2);
+ else if (V2.is(0))
+ Res[i] = BitValue::ref(V1);
+ else if (V1 == V2)
+ Res[i] = BitValue::Zero;
+ else
+ Res[i] = BitValue::self();
+ }
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
+ uint16_t W = A1.width();
+ RegisterCell Res(W);
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitValue &V = A1[i];
+ if (V.is(0))
+ Res[i] = BitValue::One;
+ else if (V.is(1))
+ Res[i] = BitValue::Zero;
+ else
+ Res[i] = BitValue::self();
+ }
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
+ uint16_t BitN) const {
+ assert(BitN < A1.width());
+ RegisterCell Res = RegisterCell::ref(A1);
+ Res[BitN] = BitValue::One;
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
+ uint16_t BitN) const {
+ assert(BitN < A1.width());
+ RegisterCell Res = RegisterCell::ref(A1);
+ Res[BitN] = BitValue::Zero;
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
+ uint16_t W) const {
+ uint16_t C = A1.cl(B), AW = A1.width();
+ // If the last leading non-B bit is not a constant, then we don't know
+ // the real count.
+ if ((C < AW && A1[AW-1-C].num()) || C == AW)
+ return eIMM(C, W);
+ return RegisterCell::self(0, W);
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
+ uint16_t W) const {
+ uint16_t C = A1.ct(B), AW = A1.width();
+ // If the last trailing non-B bit is not a constant, then we don't know
+ // the real count.
+ if ((C < AW && A1[C].num()) || C == AW)
+ return eIMM(C, W);
+ return RegisterCell::self(0, W);
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
+ uint16_t FromN) const {
+ uint16_t W = A1.width();
+ assert(FromN <= W);
+ RegisterCell Res = RegisterCell::ref(A1);
+ BitValue Sign = Res[FromN-1];
+ // Sign-extend "inreg".
+ Res.fill(FromN, W, Sign);
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
+ uint16_t FromN) const {
+ uint16_t W = A1.width();
+ assert(FromN <= W);
+ RegisterCell Res = RegisterCell::ref(A1);
+ Res.fill(FromN, W, BitValue::Zero);
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
+ uint16_t B, uint16_t E) const {
+ uint16_t W = A1.width();
+ assert(B < W && E <= W);
+ if (B == E)
+ return RegisterCell(0);
+ uint16_t Last = (E > 0) ? E-1 : W-1;
+ RegisterCell Res = RegisterCell::ref(A1).extract(BT::BitMask(B, Last));
+ // Return shorter cell.
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
+ const RegisterCell &A2, uint16_t AtN) const {
+ uint16_t W1 = A1.width(), W2 = A2.width();
+ (void)W1;
+ assert(AtN < W1 && AtN+W2 <= W1);
+ // Copy bits from A1, insert A2 at position AtN.
+ RegisterCell Res = RegisterCell::ref(A1);
+ if (W2 > 0)
+ Res.insert(RegisterCell::ref(A2), BT::BitMask(AtN, AtN+W2-1));
+ return Res;
+}
+
+
+BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
+ assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
+ uint16_t W = getRegBitWidth(Reg);
+ assert(W > 0 && "Cannot generate mask for empty register");
+ return BitMask(0, W-1);
+}
+
+
+bool BT::MachineEvaluator::evaluate(const MachineInstr *MI,
+ const CellMapType &Inputs, CellMapType &Outputs) const {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::REG_SEQUENCE: {
+ RegisterRef RD = MI->getOperand(0);
+ assert(RD.Sub == 0);
+ RegisterRef RS = MI->getOperand(1);
+ unsigned SS = MI->getOperand(2).getImm();
+ RegisterRef RT = MI->getOperand(3);
+ unsigned ST = MI->getOperand(4).getImm();
+ assert(SS != ST);
+
+ uint16_t W = getRegBitWidth(RD);
+ RegisterCell Res(W);
+ Res.insert(RegisterCell::ref(getCell(RS, Inputs)), mask(RD.Reg, SS));
+ Res.insert(RegisterCell::ref(getCell(RT, Inputs)), mask(RD.Reg, ST));
+ putCell(RD, Res, Outputs);
+ break;
+ }
+
+ case TargetOpcode::COPY: {
+ // COPY can transfer a smaller register into a wider one.
+ // If that is the case, fill the remaining high bits with 0.
+ RegisterRef RD = MI->getOperand(0);
+ RegisterRef RS = MI->getOperand(1);
+ assert(RD.Sub == 0);
+ uint16_t WD = getRegBitWidth(RD);
+ uint16_t WS = getRegBitWidth(RS);
+ assert(WD >= WS);
+ RegisterCell Src = getCell(RS, Inputs);
+ RegisterCell Res(WD);
+ Res.insert(Src, BitMask(0, WS-1));
+ Res.fill(WS, WD, BitValue::Zero);
+ putCell(RD, Res, Outputs);
+ break;
+ }
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+
+// Main W-Z implementation.
+
+void BT::visitPHI(const MachineInstr *PI) {
+ int ThisN = PI->getParent()->getNumber();
+ if (Trace)
+ dbgs() << "Visit FI(BB#" << ThisN << "): " << *PI;
+
+ const MachineOperand &MD = PI->getOperand(0);
+ assert(MD.getSubReg() == 0 && "Unexpected sub-register in definition");
+ RegisterRef DefRR(MD);
+ uint16_t DefBW = ME.getRegBitWidth(DefRR);
+
+ RegisterCell DefC = ME.getCell(DefRR, Map);
+ if (DefC == RegisterCell::self(DefRR.Reg, DefBW)) // XXX slow
+ return;
+
+ bool Changed = false;
+
+ for (unsigned i = 1, n = PI->getNumOperands(); i < n; i += 2) {
+ const MachineBasicBlock *PB = PI->getOperand(i+1).getMBB();
+ int PredN = PB->getNumber();
+ if (Trace)
+ dbgs() << " edge BB#" << PredN << "->BB#" << ThisN;
+ if (!EdgeExec.count(CFGEdge(PredN, ThisN))) {
+ if (Trace)
+ dbgs() << " not executable\n";
+ continue;
+ }
+
+ RegisterRef RU = PI->getOperand(i);
+ RegisterCell ResC = ME.getCell(RU, Map);
+ if (Trace)
+ dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub)
+ << " cell: " << ResC << "\n";
+ Changed |= DefC.meet(ResC, DefRR.Reg);
+ }
+
+ if (Changed) {
+ if (Trace)
+ dbgs() << "Output: " << PrintReg(DefRR.Reg, &ME.TRI, DefRR.Sub)
+ << " cell: " << DefC << "\n";
+ ME.putCell(DefRR, DefC, Map);
+ visitUsesOf(DefRR.Reg);
+ }
+}
+
+
+void BT::visitNonBranch(const MachineInstr *MI) {
+ if (Trace) {
+ int ThisN = MI->getParent()->getNumber();
+ dbgs() << "Visit MI(BB#" << ThisN << "): " << *MI;
+ }
+ if (MI->isDebugValue())
+ return;
+ assert(!MI->isBranch() && "Unexpected branch instruction");
+
+ CellMapType ResMap;
+ bool Eval = ME.evaluate(MI, Map, ResMap);
+
+ if (Trace && Eval) {
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ RegisterRef RU(MO);
+ dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub)
+ << " cell: " << ME.getCell(RU, Map) << "\n";
+ }
+ dbgs() << "Outputs:\n";
+ for (CellMapType::iterator I = ResMap.begin(), E = ResMap.end();
+ I != E; ++I) {
+ RegisterRef RD(I->first);
+ dbgs() << " " << PrintReg(I->first, &ME.TRI) << " cell: "
+ << ME.getCell(RD, ResMap) << "\n";
+ }
+ }
+
+ // Iterate over all definitions of the instruction, and update the
+ // cells accordingly.
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Visit register defs only.
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ RegisterRef RD(MO);
+ assert(RD.Sub == 0 && "Unexpected sub-register in definition");
+ if (!TargetRegisterInfo::isVirtualRegister(RD.Reg))
+ continue;
+
+ bool Changed = false;
+ if (!Eval || !ResMap.has(RD.Reg)) {
+ // Set to "ref" (aka "bottom").
+ uint16_t DefBW = ME.getRegBitWidth(RD);
+ RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW);
+ if (RefC != ME.getCell(RD, Map)) {
+ ME.putCell(RD, RefC, Map);
+ Changed = true;
+ }
+ } else {
+ RegisterCell DefC = ME.getCell(RD, Map);
+ RegisterCell ResC = ME.getCell(RD, ResMap);
+ // This is a non-phi instruction, so the values of the inputs come
+ // from the same registers each time this instruction is evaluated.
+ // During the propagation, the values of the inputs can become lowered
+ // in the sense of the lattice operation, which may cause different
+ // results to be calculated in subsequent evaluations. This should
+ // not cause the bottoming of the result in the map, since the new
+ // result is already reflecting the lowered inputs.
+ for (uint16_t i = 0, w = DefC.width(); i < w; ++i) {
+ BitValue &V = DefC[i];
+ // Bits that are already "bottom" should not be updated.
+ if (V.Type == BitValue::Ref && V.RefI.Reg == RD.Reg)
+ continue;
+ // Same for those that are identical in DefC and ResC.
+ if (V == ResC[i])
+ continue;
+ V = ResC[i];
+ Changed = true;
+ }
+ if (Changed)
+ ME.putCell(RD, DefC, Map);
+ }
+ if (Changed)
+ visitUsesOf(RD.Reg);
+ }
+}
+
+
+void BT::visitBranchesFrom(const MachineInstr *BI) {
+ const MachineBasicBlock &B = *BI->getParent();
+ MachineBasicBlock::const_iterator It = BI, End = B.end();
+ BranchTargetList Targets, BTs;
+ bool FallsThrough = true, DefaultToAll = false;
+ int ThisN = B.getNumber();
+
+ do {
+ BTs.clear();
+ const MachineInstr *MI = &*It;
+ if (Trace)
+ dbgs() << "Visit BR(BB#" << ThisN << "): " << *MI;
+ assert(MI->isBranch() && "Expecting branch instruction");
+ InstrExec.insert(MI);
+ bool Eval = ME.evaluate(MI, Map, BTs, FallsThrough);
+ if (!Eval) {
+ // If the evaluation failed, we will add all targets. Keep going in
+ // the loop to mark all executable branches as such.
+ DefaultToAll = true;
+ FallsThrough = true;
+ if (Trace)
+ dbgs() << " failed to evaluate: will add all CFG successors\n";
+ } else if (!DefaultToAll) {
+ // If evaluated successfully add the targets to the cumulative list.
+ if (Trace) {
+ dbgs() << " adding targets:";
+ for (unsigned i = 0, n = BTs.size(); i < n; ++i)
+ dbgs() << " BB#" << BTs[i]->getNumber();
+ if (FallsThrough)
+ dbgs() << "\n falls through\n";
+ else
+ dbgs() << "\n does not fall through\n";
+ }
+ Targets.insert(BTs.begin(), BTs.end());
+ }
+ ++It;
+ } while (FallsThrough && It != End);
+
+ typedef MachineBasicBlock::const_succ_iterator succ_iterator;
+ if (!DefaultToAll) {
+ // Need to add all CFG successors that lead to EH landing pads.
+ // There won't be explicit branches to these blocks, but they must
+ // be processed.
+ for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) {
+ const MachineBasicBlock *SB = *I;
+ if (SB->isLandingPad())
+ Targets.insert(SB);
+ }
+ if (FallsThrough) {
+ MachineFunction::const_iterator BIt = &B;
+ MachineFunction::const_iterator Next = std::next(BIt);
+ if (Next != MF.end())
+ Targets.insert(&*Next);
+ }
+ } else {
+ for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I)
+ Targets.insert(*I);
+ }
+
+ for (unsigned i = 0, n = Targets.size(); i < n; ++i) {
+ int TargetN = Targets[i]->getNumber();
+ FlowQ.push(CFGEdge(ThisN, TargetN));
+ }
+}
+
+
+void BT::visitUsesOf(unsigned Reg) {
+ if (Trace)
+ dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n";
+
+ typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
+ use_iterator End = MRI.use_nodbg_end();
+ for (use_iterator I = MRI.use_nodbg_begin(Reg); I != End; ++I) {
+ MachineInstr *UseI = I->getParent();
+ if (!InstrExec.count(UseI))
+ continue;
+ if (UseI->isPHI())
+ visitPHI(UseI);
+ else if (!UseI->isBranch())
+ visitNonBranch(UseI);
+ else
+ visitBranchesFrom(UseI);
+ }
+}
+
+
+BT::RegisterCell BT::get(RegisterRef RR) const {
+ return ME.getCell(RR, Map);
+}
+
+
+void BT::put(RegisterRef RR, const RegisterCell &RC) {
+ ME.putCell(RR, RC, Map);
+}
+
+
+// Replace all references to bits from OldRR with the corresponding bits
+// in NewRR.
+void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
+ assert(Map.has(OldRR.Reg) && "OldRR not present in map");
+ BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub);
+ BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub);
+ uint16_t OMB = OM.first(), OME = OM.last();
+ uint16_t NMB = NM.first(), NME = NM.last();
+ (void)NME;
+ assert((OME-OMB == NME-NMB) &&
+ "Substituting registers of different lengths");
+ for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
+ RegisterCell &RC = I->second;
+ for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+ BitValue &V = RC[i];
+ if (V.Type != BitValue::Ref || V.RefI.Reg != OldRR.Reg)
+ continue;
+ if (V.RefI.Pos < OMB || V.RefI.Pos > OME)
+ continue;
+ V.RefI.Reg = NewRR.Reg;
+ V.RefI.Pos += NMB-OMB;
+ }
+ }
+}
+
+
+// Check if the block has been "executed" during propagation. (If not, the
+// block is dead, but it may still appear to be reachable.)
+bool BT::reached(const MachineBasicBlock *B) const {
+ int BN = B->getNumber();
+ assert(BN >= 0);
+ for (EdgeSetType::iterator I = EdgeExec.begin(), E = EdgeExec.end();
+ I != E; ++I) {
+ if (I->second == BN)
+ return true;
+ }
+ return false;
+}
+
+
+void BT::reset() {
+ EdgeExec.clear();
+ InstrExec.clear();
+ Map.clear();
+}
+
+
+void BT::run() {
+ reset();
+ assert(FlowQ.empty());
+
+ typedef GraphTraits<const MachineFunction*> MachineFlowGraphTraits;
+ const MachineBasicBlock *Entry = MachineFlowGraphTraits::getEntryNode(&MF);
+
+ unsigned MaxBN = 0;
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ assert(I->getNumber() >= 0 && "Disconnected block");
+ unsigned BN = I->getNumber();
+ if (BN > MaxBN)
+ MaxBN = BN;
+ }
+
+ // Keep track of visited blocks.
+ BitVector BlockScanned(MaxBN+1);
+
+ int EntryN = Entry->getNumber();
+ // Generate a fake edge to get something to start with.
+ FlowQ.push(CFGEdge(-1, EntryN));
+
+ while (!FlowQ.empty()) {
+ CFGEdge Edge = FlowQ.front();
+ FlowQ.pop();
+
+ if (EdgeExec.count(Edge))
+ continue;
+ EdgeExec.insert(Edge);
+
+ const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second);
+ MachineBasicBlock::const_iterator It = B.begin(), End = B.end();
+ // Visit PHI nodes first.
+ while (It != End && It->isPHI()) {
+ const MachineInstr *PI = &*It++;
+ InstrExec.insert(PI);
+ visitPHI(PI);
+ }
+
+ // If this block has already been visited through a flow graph edge,
+ // then the instructions have already been processed. Any updates to
+ // the cells would now only happen through visitUsesOf...
+ if (BlockScanned[Edge.second])
+ continue;
+ BlockScanned[Edge.second] = true;
+
+ // Visit non-branch instructions.
+ while (It != End && !It->isBranch()) {
+ const MachineInstr *MI = &*It++;
+ InstrExec.insert(MI);
+ visitNonBranch(MI);
+ }
+ // If block end has been reached, add the fall-through edge to the queue.
+ if (It == End) {
+ MachineFunction::const_iterator BIt = &B;
+ MachineFunction::const_iterator Next = std::next(BIt);
+ if (Next != MF.end()) {
+ int ThisN = B.getNumber();
+ int NextN = Next->getNumber();
+ FlowQ.push(CFGEdge(ThisN, NextN));
+ }
+ } else {
+ // Handle the remaining sequence of branches. This function will update
+ // the work queue.
+ visitBranchesFrom(It);
+ }
+ } // while (!FlowQ->empty())
+
+ if (Trace) {
+ dbgs() << "Cells after propagation:\n";
+ for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
+ dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
+ }
+}
+
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
new file mode 100644
index 000000000000..ed002a794d66
--- /dev/null
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -0,0 +1,449 @@
+//===--- BitTracker.h -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITTRACKER_H
+#define BITTRACKER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+#include <map>
+#include <queue>
+#include <set>
+
+namespace llvm {
+ class ConstantInt;
+ class MachineRegisterInfo;
+ class MachineBasicBlock;
+ class MachineInstr;
+ class MachineOperand;
+ class raw_ostream;
+
+struct BitTracker {
+ struct BitRef;
+ struct RegisterRef;
+ struct BitValue;
+ struct BitMask;
+ struct RegisterCell;
+ struct MachineEvaluator;
+
+ typedef SetVector<const MachineBasicBlock *> BranchTargetList;
+
+ struct CellMapType : public std::map<unsigned,RegisterCell> {
+ bool has(unsigned Reg) const;
+ };
+
+ BitTracker(const MachineEvaluator &E, MachineFunction &F);
+ ~BitTracker();
+
+ void run();
+ void trace(bool On = false) { Trace = On; }
+ bool has(unsigned Reg) const;
+ const RegisterCell &lookup(unsigned Reg) const;
+ RegisterCell get(RegisterRef RR) const;
+ void put(RegisterRef RR, const RegisterCell &RC);
+ void subst(RegisterRef OldRR, RegisterRef NewRR);
+ bool reached(const MachineBasicBlock *B) const;
+
+private:
+ void visitPHI(const MachineInstr *PI);
+ void visitNonBranch(const MachineInstr *MI);
+ void visitBranchesFrom(const MachineInstr *BI);
+ void visitUsesOf(unsigned Reg);
+ void reset();
+
+ typedef std::pair<int,int> CFGEdge;
+ typedef std::set<CFGEdge> EdgeSetType;
+ typedef std::set<const MachineInstr *> InstrSetType;
+ typedef std::queue<CFGEdge> EdgeQueueType;
+
+ EdgeSetType EdgeExec; // Executable flow graph edges.
+ InstrSetType InstrExec; // Executable instructions.
+ EdgeQueueType FlowQ; // Work queue of CFG edges.
+ bool Trace; // Enable tracing for debugging.
+
+ const MachineEvaluator &ME;
+ MachineFunction &MF;
+ MachineRegisterInfo &MRI;
+ CellMapType &Map;
+};
+
+
+// Abstraction of a reference to bit at position Pos from a register Reg.
+struct BitTracker::BitRef {
+ BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
+ BitRef(const BitRef &BR) : Reg(BR.Reg), Pos(BR.Pos) {}
+ bool operator== (const BitRef &BR) const {
+ // If Reg is 0, disregard Pos.
+ return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
+ }
+ unsigned Reg;
+ uint16_t Pos;
+};
+
+
+// Abstraction of a register reference in MachineOperand. It contains the
+// register number and the subregister index.
+struct BitTracker::RegisterRef {
+ RegisterRef(unsigned R = 0, unsigned S = 0)
+ : Reg(R), Sub(S) {}
+ RegisterRef(const MachineOperand &MO)
+ : Reg(MO.getReg()), Sub(MO.getSubReg()) {}
+ unsigned Reg, Sub;
+};
+
+
+// Value that a single bit can take. This is outside of the context of
+// any register, it is more of an abstraction of the two-element set of
+// possible bit values. One extension here is the "Ref" type, which
+// indicates that this bit takes the same value as the bit described by
+// RefInfo.
+struct BitTracker::BitValue {
+ enum ValueType {
+ Top, // Bit not yet defined.
+ Zero, // Bit = 0.
+ One, // Bit = 1.
+ Ref // Bit value same as the one described in RefI.
+ // Conceptually, there is no explicit "bottom" value: the lattice's
+ // bottom will be expressed as a "ref to itself", which, in the context
+ // of registers, could be read as "this value of this bit is defined by
+ // this bit".
+ // The ordering is:
+ // x <= Top,
+ // Self <= x, where "Self" is "ref to itself".
+ // This makes the value lattice different for each virtual register
+ // (even for each bit in the same virtual register), since the "bottom"
+ // for one register will be a simple "ref" for another register.
+ // Since we do not store the "Self" bit and register number, the meet
+ // operation will need to take it as a parameter.
+ //
+ // In practice there is a special case for values that are not associa-
+ // ted with any specific virtual register. An example would be a value
+ // corresponding to a bit of a physical register, or an intermediate
+ // value obtained in some computation (such as instruction evaluation).
+ // Such cases are identical to the usual Ref type, but the register
+ // number is 0. In such case the Pos field of the reference is ignored.
+ //
+ // What is worthy of notice is that in value V (that is a "ref"), as long
+ // as the RefI.Reg is not 0, it may actually be the same register as the
+ // one in which V will be contained. If the RefI.Pos refers to the posi-
+ // tion of V, then V is assumed to be "bottom" (as a "ref to itself"),
+ // otherwise V is taken to be identical to the referenced bit of the
+ // same register.
+ // If RefI.Reg is 0, however, such a reference to the same register is
+ // not possible. Any value V that is a "ref", and whose RefI.Reg is 0
+ // is treated as "bottom".
+ };
+ ValueType Type;
+ BitRef RefI;
+
+ BitValue(ValueType T = Top) : Type(T) {}
+ BitValue(bool B) : Type(B ? One : Zero) {}
+ BitValue(const BitValue &V) : Type(V.Type), RefI(V.RefI) {}
+ BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {}
+
+ bool operator== (const BitValue &V) const {
+ if (Type != V.Type)
+ return false;
+ if (Type == Ref && !(RefI == V.RefI))
+ return false;
+ return true;
+ }
+ bool operator!= (const BitValue &V) const {
+ return !operator==(V);
+ }
+ bool is(unsigned T) const {
+ assert(T == 0 || T == 1);
+ return T == 0 ? Type == Zero
+ : (T == 1 ? Type == One : false);
+ }
+
+ // The "meet" operation is the "." operation in a semilattice (L, ., T, B):
+ // (1) x.x = x
+ // (2) x.y = y.x
+ // (3) x.(y.z) = (x.y).z
+ // (4) x.T = x (i.e. T = "top")
+ // (5) x.B = B (i.e. B = "bottom")
+ //
+ // This "meet" function will update the value of the "*this" object with
+ // the newly calculated one, and return "true" if the value of *this has
+ // changed, and "false" otherwise.
+ // To prove that it satisfies the conditions (1)-(5), it is sufficient
+ // to show that a relation
+ // x <= y <=> x.y = x
+ // defines a partial order (i.e. that "meet" is same as "infimum").
+ bool meet(const BitValue &V, const BitRef &Self) {
+ // First, check the cases where there is nothing to be done.
+ if (Type == Ref && RefI == Self) // Bottom.meet(V) = Bottom (i.e. This)
+ return false;
+ if (V.Type == Top) // This.meet(Top) = This
+ return false;
+ if (*this == V) // This.meet(This) = This
+ return false;
+
+ // At this point, we know that the value of "this" will change.
+ // If it is Top, it will become the same as V, otherwise it will
+ // become "bottom" (i.e. Self).
+ if (Type == Top) {
+ Type = V.Type;
+ RefI = V.RefI; // This may be irrelevant, but copy anyway.
+ return true;
+ }
+ // Become "bottom".
+ Type = Ref;
+ RefI = Self;
+ return true;
+ }
+
+ // Create a reference to the bit value V.
+ static BitValue ref(const BitValue &V);
+ // Create a "self".
+ static BitValue self(const BitRef &Self = BitRef());
+
+ bool num() const {
+ return Type == Zero || Type == One;
+ }
+ operator bool() const {
+ assert(Type == Zero || Type == One);
+ return Type == One;
+ }
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV);
+};
+
+
+// This operation must be idempotent, i.e. ref(ref(V)) == ref(V).
+inline BitTracker::BitValue
+BitTracker::BitValue::ref(const BitValue &V) {
+ if (V.Type != Ref)
+ return BitValue(V.Type);
+ if (V.RefI.Reg != 0)
+ return BitValue(V.RefI.Reg, V.RefI.Pos);
+ return self();
+}
+
+
+inline BitTracker::BitValue
+BitTracker::BitValue::self(const BitRef &Self) {
+ return BitValue(Self.Reg, Self.Pos);
+}
+
+
+// A sequence of bits starting from index B up to and including index E.
+// If E < B, the mask represents two sections: [0..E] and [B..W) where
+// W is the width of the register.
+struct BitTracker::BitMask {
+ BitMask() : B(0), E(0) {}
+ BitMask(uint16_t b, uint16_t e) : B(b), E(e) {}
+ uint16_t first() const { return B; }
+ uint16_t last() const { return E; }
+private:
+ uint16_t B, E;
+};
+
+
+// Representation of a register: a list of BitValues.
+struct BitTracker::RegisterCell {
+ RegisterCell(uint16_t Width = DefaultBitN) : Bits(Width) {}
+
+ uint16_t width() const {
+ return Bits.size();
+ }
+ const BitValue &operator[](uint16_t BitN) const {
+ assert(BitN < Bits.size());
+ return Bits[BitN];
+ }
+ BitValue &operator[](uint16_t BitN) {
+ assert(BitN < Bits.size());
+ return Bits[BitN];
+ }
+
+ bool meet(const RegisterCell &RC, unsigned SelfR);
+ RegisterCell &insert(const RegisterCell &RC, const BitMask &M);
+ RegisterCell extract(const BitMask &M) const; // Returns a new cell.
+ RegisterCell &rol(uint16_t Sh); // Rotate left.
+ RegisterCell &fill(uint16_t B, uint16_t E, const BitValue &V);
+ RegisterCell &cat(const RegisterCell &RC); // Concatenate.
+ uint16_t cl(bool B) const;
+ uint16_t ct(bool B) const;
+
+ bool operator== (const RegisterCell &RC) const;
+ bool operator!= (const RegisterCell &RC) const {
+ return !operator==(RC);
+ }
+
+ const RegisterCell &operator=(const RegisterCell &RC) {
+ Bits = RC.Bits;
+ return *this;
+ }
+
+ // Generate a "ref" cell for the corresponding register. In the resulting
+ // cell each bit will be described as being the same as the corresponding
+ // bit in register Reg (i.e. the cell is "defined" by register Reg).
+ static RegisterCell self(unsigned Reg, uint16_t Width);
+ // Generate a "top" cell of given size.
+ static RegisterCell top(uint16_t Width);
+ // Generate a cell that is a "ref" to another cell.
+ static RegisterCell ref(const RegisterCell &C);
+
+private:
+ // The DefaultBitN is here only to avoid frequent reallocation of the
+ // memory in the vector.
+ static const unsigned DefaultBitN = 32;
+ typedef SmallVector<BitValue, DefaultBitN> BitValueList;
+ BitValueList Bits;
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC);
+};
+
+
+inline bool BitTracker::has(unsigned Reg) const {
+ return Map.find(Reg) != Map.end();
+}
+
+
+inline const BitTracker::RegisterCell&
+BitTracker::lookup(unsigned Reg) const {
+ CellMapType::const_iterator F = Map.find(Reg);
+ assert(F != Map.end());
+ return F->second;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
+ RegisterCell RC(Width);
+ for (uint16_t i = 0; i < Width; ++i)
+ RC.Bits[i] = BitValue::self(BitRef(Reg, i));
+ return RC;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::top(uint16_t Width) {
+ RegisterCell RC(Width);
+ for (uint16_t i = 0; i < Width; ++i)
+ RC.Bits[i] = BitValue(BitValue::Top);
+ return RC;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::ref(const RegisterCell &C) {
+ uint16_t W = C.width();
+ RegisterCell RC(W);
+ for (unsigned i = 0; i < W; ++i)
+ RC[i] = BitValue::ref(C[i]);
+ return RC;
+}
+
+
+inline bool BitTracker::CellMapType::has(unsigned Reg) const {
+ return find(Reg) != end();
+}
+
+// A class to evaluate target's instructions and update the cell maps.
+// This is used internally by the bit tracker. A target that wants to
+// utilize this should implement the evaluation functions (noted below)
+// in a subclass of this class.
+struct BitTracker::MachineEvaluator {
+ MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M)
+ : TRI(T), MRI(M) {}
+ virtual ~MachineEvaluator() {}
+
+ uint16_t getRegBitWidth(const RegisterRef &RR) const;
+
+ RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const;
+ void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const;
+ // A result of any operation should use refs to the source cells, not
+ // the cells directly. This function is a convenience wrapper to quickly
+ // generate a ref for a cell corresponding to a register reference.
+ RegisterCell getRef(const RegisterRef &RR, const CellMapType &M) const {
+ RegisterCell RC = getCell(RR, M);
+ return RegisterCell::ref(RC);
+ }
+
+ // Helper functions.
+ // Check if a cell is an immediate value (i.e. all bits are either 0 or 1).
+ bool isInt(const RegisterCell &A) const;
+ // Convert cell to an immediate value.
+ uint64_t toInt(const RegisterCell &A) const;
+
+ // Generate cell from an immediate value.
+ RegisterCell eIMM(int64_t V, uint16_t W) const;
+ RegisterCell eIMM(const ConstantInt *CI) const;
+
+ // Arithmetic.
+ RegisterCell eADD(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eSUB(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eMLS(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eMLU(const RegisterCell &A1, const RegisterCell &A2) const;
+
+ // Shifts.
+ RegisterCell eASL(const RegisterCell &A1, uint16_t Sh) const;
+ RegisterCell eLSR(const RegisterCell &A1, uint16_t Sh) const;
+ RegisterCell eASR(const RegisterCell &A1, uint16_t Sh) const;
+
+ // Logical.
+ RegisterCell eAND(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eORL(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eXOR(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eNOT(const RegisterCell &A1) const;
+
+ // Set bit, clear bit.
+ RegisterCell eSET(const RegisterCell &A1, uint16_t BitN) const;
+ RegisterCell eCLR(const RegisterCell &A1, uint16_t BitN) const;
+
+ // Count leading/trailing bits (zeros/ones).
+ RegisterCell eCLB(const RegisterCell &A1, bool B, uint16_t W) const;
+ RegisterCell eCTB(const RegisterCell &A1, bool B, uint16_t W) const;
+
+ // Sign/zero extension.
+ RegisterCell eSXT(const RegisterCell &A1, uint16_t FromN) const;
+ RegisterCell eZXT(const RegisterCell &A1, uint16_t FromN) const;
+
+ // Extract/insert
+ // XTR R,b,e: extract bits from A1 starting at bit b, ending at e-1.
+ // INS R,S,b: take R and replace bits starting from b with S.
+ RegisterCell eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const;
+ RegisterCell eINS(const RegisterCell &A1, const RegisterCell &A2,
+ uint16_t AtN) const;
+
+ // User-provided functions for individual targets:
+
+ // Return a sub-register mask that indicates which bits in Reg belong
+ // to the subregister Sub. These bits are assumed to be contiguous in
+ // the super-register, and have the same ordering in the sub-register
+ // as in the super-register. It is valid to call this function with
+ // Sub == 0, in this case, the function should return a mask that spans
+ // the entire register Reg (which is what the default implementation
+ // does).
+ virtual BitMask mask(unsigned Reg, unsigned Sub) const;
+ // Indicate whether a given register class should be tracked.
+ virtual bool track(const TargetRegisterClass *RC) const { return true; }
+ // Evaluate a non-branching machine instruction, given the cell map with
+ // the input values. Place the results in the Outputs map. Return "true"
+ // if evaluation succeeded, "false" otherwise.
+ virtual bool evaluate(const MachineInstr *MI, const CellMapType &Inputs,
+ CellMapType &Outputs) const;
+ // Evaluate a branch, given the cell map with the input values. Fill out
+ // a list of all possible branch targets and indicate (through a flag)
+ // whether the branch could fall-through. Return "true" if this information
+ // has been successfully computed, "false" otherwise.
+ virtual bool evaluate(const MachineInstr *BI, const CellMapType &Inputs,
+ BranchTargetList &Targets, bool &FallsThru) const = 0;
+
+ const TargetRegisterInfo &TRI;
+ MachineRegisterInfo &MRI;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 758ccc741007..7ab2f0ba01df 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -12,13 +12,19 @@ tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(HexagonCommonTableGen)
add_llvm_target(HexagonCodeGen
+ BitTracker.cpp
HexagonAsmPrinter.cpp
+ HexagonBitTracker.cpp
HexagonCFGOptimizer.cpp
+ HexagonCommonGEP.cpp
HexagonCopyToCombine.cpp
HexagonExpandCondsets.cpp
HexagonExpandPredSpillCode.cpp
HexagonFixupHwLoops.cpp
HexagonFrameLowering.cpp
+ HexagonGenExtract.cpp
+ HexagonGenInsert.cpp
+ HexagonGenPredicate.cpp
HexagonHardwareLoops.cpp
HexagonInstrInfo.cpp
HexagonISelDAGToDAG.cpp
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
new file mode 100644
index 000000000000..021e58a1d08a
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -0,0 +1,1174 @@
+//===--- HexagonBitTracker.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+using namespace llvm;
+
+typedef BitTracker BT;
+
+HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
+ MachineRegisterInfo &mri,
+ const HexagonInstrInfo &tii,
+ MachineFunction &mf)
+ : MachineEvaluator(tri, mri), MF(mf), MFI(*mf.getFrameInfo()), TII(tii) {
+ // Populate the VRX map (VR to extension-type).
+ // Go over all the formal parameters of the function. If a given parameter
+ // P is sign- or zero-extended, locate the virtual register holding that
+ // parameter and create an entry in the VRX map indicating the type of ex-
+ // tension (and the source type).
+ // This is a bit complicated to do accurately, since the memory layout in-
+ // formation is necessary to precisely determine whether an aggregate para-
+ // meter will be passed in a register or in memory. What is given in MRI
+ // is the association between the physical register that is live-in (i.e.
+ // holds an argument), and the virtual register that this value will be
+ // copied into. This, by itself, is not sufficient to map back the virtual
+ // register to a formal parameter from Function (since consecutive live-ins
+ // from MRI may not correspond to consecutive formal parameters from Func-
+ // tion). To avoid the complications with in-memory arguments, only consi-
+ // der the initial sequence of formal parameters that are known to be
+ // passed via registers.
+ unsigned AttrIdx = 0;
+ unsigned InVirtReg, InPhysReg = 0;
+ const Function &F = *MF.getFunction();
+ typedef Function::const_arg_iterator arg_iterator;
+ for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+ AttrIdx++;
+ const Argument &Arg = *I;
+ Type *ATy = Arg.getType();
+ unsigned Width = 0;
+ if (ATy->isIntegerTy())
+ Width = ATy->getIntegerBitWidth();
+ else if (ATy->isPointerTy())
+ Width = 32;
+ // If pointer size is not set through target data, it will default to
+ // Module::AnyPointerSize.
+ if (Width == 0 || Width > 64)
+ break;
+ InPhysReg = getNextPhysReg(InPhysReg, Width);
+ if (!InPhysReg)
+ break;
+ InVirtReg = getVirtRegFor(InPhysReg);
+ if (!InVirtReg)
+ continue;
+ AttributeSet Attrs = F.getAttributes();
+ if (Attrs.hasAttribute(AttrIdx, Attribute::SExt))
+ VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width)));
+ else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt))
+ VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::ZExt, Width)));
+ }
+}
+
+
+BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
+ if (Sub == 0)
+ return MachineEvaluator::mask(Reg, 0);
+ using namespace Hexagon;
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ unsigned ID = RC->getID();
+ uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
+ switch (ID) {
+ case DoubleRegsRegClassID:
+ return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1)
+ : BT::BitMask(RW, 2*RW-1);
+ default:
+ break;
+ }
+#ifndef NDEBUG
+ dbgs() << PrintReg(Reg, &TRI, Sub) << '\n';
+#endif
+ llvm_unreachable("Unexpected register/subregister");
+}
+
+
+namespace {
+ struct RegisterRefs : public std::vector<BT::RegisterRef> {
+ typedef std::vector<BT::RegisterRef> Base;
+ RegisterRefs(const MachineInstr *MI);
+ const BT::RegisterRef &operator[](unsigned n) const {
+ // The main purpose of this operator is to assert with bad argument.
+ assert(n < size());
+ return Base::operator[](n);
+ }
+ };
+
+ RegisterRefs::RegisterRefs(const MachineInstr *MI)
+ : Base(MI->getNumOperands()) {
+ for (unsigned i = 0, n = size(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg())
+ at(i) = BT::RegisterRef(MO);
+ // For indices that don't correspond to registers, the entry will
+ // remain constructed via the default constructor.
+ }
+ }
+}
+
+
+bool HexagonEvaluator::evaluate(const MachineInstr *MI,
+ const CellMapType &Inputs, CellMapType &Outputs) const {
+ unsigned NumDefs = 0;
+
+ // Sanity verification: there should not be any defs with subregisters.
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ NumDefs++;
+ assert(MO.getSubReg() == 0);
+ }
+
+ if (NumDefs == 0)
+ return false;
+
+ if (MI->mayLoad())
+ return evaluateLoad(MI, Inputs, Outputs);
+
+ // Check COPY instructions that copy formal parameters into virtual
+ // registers. Such parameters can be sign- or zero-extended at the
+ // call site, and we should take advantage of this knowledge. The MRI
+ // keeps a list of pairs of live-in physical and virtual registers,
+ // which provides information about which virtual registers will hold
+ // the argument values. The function will still contain instructions
+ // defining those virtual registers, and in practice those are COPY
+ // instructions from a physical to a virtual register. In such cases,
+ // applying the argument extension to the virtual register can be seen
+ // as simply mirroring the extension that had already been applied to
+ // the physical register at the call site. If the defining instruction
+ // was not a COPY, it would not be clear how to mirror that extension
+ // on the callee's side. For that reason, only check COPY instructions
+ // for potential extensions.
+ if (MI->isCopy()) {
+ if (evaluateFormalCopy(MI, Inputs, Outputs))
+ return true;
+ }
+
+ // Beyond this point, if any operand is a global, skip that instruction.
+ // The reason is that certain instructions that can take an immediate
+ // operand can also have a global symbol in that operand. To avoid
+ // checking what kind of operand a given instruction has individually
+ // for each instruction, do it here. Global symbols as operands gene-
+ // rally do not provide any useful information.
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isGlobal() || MO.isBlockAddress() || MO.isSymbol() || MO.isJTI() ||
+ MO.isCPI())
+ return false;
+ }
+
+ RegisterRefs Reg(MI);
+ unsigned Opc = MI->getOpcode();
+ using namespace Hexagon;
+ #define op(i) MI->getOperand(i)
+ #define rc(i) RegisterCell::ref(getCell(Reg[i],Inputs))
+ #define im(i) MI->getOperand(i).getImm()
+
+ // If the instruction has no register operands, skip it.
+ if (Reg.size() == 0)
+ return false;
+
+ // Record result for register in operand 0.
+ auto rr0 = [this,Reg] (const BT::RegisterCell &Val, CellMapType &Outputs)
+ -> bool {
+ putCell(Reg[0], Val, Outputs);
+ return true;
+ };
+ // Get the cell corresponding to the N-th operand.
+ auto cop = [this,Reg,MI,Inputs] (unsigned N, uint16_t W)
+ -> BT::RegisterCell {
+ const MachineOperand &Op = MI->getOperand(N);
+ if (Op.isImm())
+ return eIMM(Op.getImm(), W);
+ if (!Op.isReg())
+ return RegisterCell::self(0, W);
+ assert(getRegBitWidth(Reg[N]) == W && "Register width mismatch");
+ return rc(N);
+ };
+ // Extract RW low bits of the cell.
+ auto lo = [this] (const BT::RegisterCell &RC, uint16_t RW)
+ -> BT::RegisterCell {
+ assert(RW <= RC.width());
+ return eXTR(RC, 0, RW);
+ };
+ // Extract RW high bits of the cell.
+ auto hi = [this] (const BT::RegisterCell &RC, uint16_t RW)
+ -> BT::RegisterCell {
+ uint16_t W = RC.width();
+ assert(RW <= W);
+ return eXTR(RC, W-RW, W);
+ };
+ // Extract N-th halfword (counting from the least significant position).
+ auto half = [this] (const BT::RegisterCell &RC, unsigned N)
+ -> BT::RegisterCell {
+ assert(N*16+16 <= RC.width());
+ return eXTR(RC, N*16, N*16+16);
+ };
+ // Shuffle bits (pick even/odd from cells and merge into result).
+ auto shuffle = [this] (const BT::RegisterCell &Rs, const BT::RegisterCell &Rt,
+ uint16_t BW, bool Odd) -> BT::RegisterCell {
+ uint16_t I = Odd, Ws = Rs.width();
+ assert(Ws == Rt.width());
+ RegisterCell RC = eXTR(Rt, I*BW, I*BW+BW).cat(eXTR(Rs, I*BW, I*BW+BW));
+ I += 2;
+ while (I*BW < Ws) {
+ RC.cat(eXTR(Rt, I*BW, I*BW+BW)).cat(eXTR(Rs, I*BW, I*BW+BW));
+ I += 2;
+ }
+ return RC;
+ };
+
+ // The bitwidth of the 0th operand. In most (if not all) of the
+ // instructions below, the 0th operand is the defined register.
+ // Pre-compute the bitwidth here, because it is needed in many cases
+ // cases below.
+ uint16_t W0 = (Reg[0].Reg != 0) ? getRegBitWidth(Reg[0]) : 0;
+
+ switch (Opc) {
+ // Transfer immediate:
+
+ case A2_tfrsi:
+ case A2_tfrpi:
+ case CONST32:
+ case CONST32_Float_Real:
+ case CONST32_Int_Real:
+ case CONST64_Float_Real:
+ case CONST64_Int_Real:
+ return rr0(eIMM(im(1), W0), Outputs);
+ case TFR_PdFalse:
+ return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::Zero), Outputs);
+ case TFR_PdTrue:
+ return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::One), Outputs);
+ case TFR_FI: {
+ int FI = op(1).getIndex();
+ int Off = op(2).getImm();
+ unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off);
+ unsigned L = Log2_32(A);
+ RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
+ RC.fill(0, L, BT::BitValue::Zero);
+ return rr0(RC, Outputs);
+ }
+
+ // Transfer register:
+
+ case A2_tfr:
+ case A2_tfrp:
+ case C2_pxfer_map:
+ return rr0(rc(1), Outputs);
+ case C2_tfrpr: {
+ uint16_t RW = W0;
+ uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+ assert(PW <= RW);
+ RegisterCell PC = eXTR(rc(1), 0, PW);
+ RegisterCell RC = RegisterCell(RW).insert(PC, BT::BitMask(0, PW-1));
+ RC.fill(PW, RW, BT::BitValue::Zero);
+ return rr0(RC, Outputs);
+ }
+ case C2_tfrrp: {
+ RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
+ W0 = 8; // XXX Pred size
+ return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs);
+ }
+
+ // Arithmetic:
+
+ case A2_abs:
+ case A2_absp:
+ // TODO
+ break;
+
+ case A2_addsp: {
+ uint16_t W1 = getRegBitWidth(Reg[1]);
+ assert(W0 == 64 && W1 == 32);
+ RegisterCell CW = RegisterCell(W0).insert(rc(1), BT::BitMask(0, W1-1));
+ RegisterCell RC = eADD(eSXT(CW, W1), rc(2));
+ return rr0(RC, Outputs);
+ }
+ case A2_add:
+ case A2_addp:
+ return rr0(eADD(rc(1), rc(2)), Outputs);
+ case A2_addi:
+ return rr0(eADD(rc(1), eIMM(im(2), W0)), Outputs);
+ case S4_addi_asl_ri: {
+ RegisterCell RC = eADD(eIMM(im(1), W0), eASL(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case S4_addi_lsr_ri: {
+ RegisterCell RC = eADD(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case S4_addaddi: {
+ RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+ return rr0(RC, Outputs);
+ }
+ case M4_mpyri_addi: {
+ RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+ RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M4_mpyrr_addi: {
+ RegisterCell M = eMLS(rc(2), rc(3));
+ RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M4_mpyri_addr_u2: {
+ RegisterCell M = eMLS(eIMM(im(2), W0), rc(3));
+ RegisterCell RC = eADD(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M4_mpyri_addr: {
+ RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+ RegisterCell RC = eADD(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M4_mpyrr_addr: {
+ RegisterCell M = eMLS(rc(2), rc(3));
+ RegisterCell RC = eADD(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case S4_subaddi: {
+ RegisterCell RC = eADD(rc(1), eSUB(eIMM(im(2), W0), rc(3)));
+ return rr0(RC, Outputs);
+ }
+ case M2_accii: {
+ RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+ return rr0(RC, Outputs);
+ }
+ case M2_acci: {
+ RegisterCell RC = eADD(rc(1), eADD(rc(2), rc(3)));
+ return rr0(RC, Outputs);
+ }
+ case M2_subacc: {
+ RegisterCell RC = eADD(rc(1), eSUB(rc(2), rc(3)));
+ return rr0(RC, Outputs);
+ }
+ case S2_addasl_rrri: {
+ RegisterCell RC = eADD(rc(1), eASL(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case C4_addipc: {
+ RegisterCell RPC = RegisterCell::self(Reg[0].Reg, W0);
+ RPC.fill(0, 2, BT::BitValue::Zero);
+ return rr0(eADD(RPC, eIMM(im(2), W0)), Outputs);
+ }
+ case A2_sub:
+ case A2_subp:
+ return rr0(eSUB(rc(1), rc(2)), Outputs);
+ case A2_subri:
+ return rr0(eSUB(eIMM(im(1), W0), rc(2)), Outputs);
+ case S4_subi_asl_ri: {
+ RegisterCell RC = eSUB(eIMM(im(1), W0), eASL(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case S4_subi_lsr_ri: {
+ RegisterCell RC = eSUB(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case M2_naccii: {
+ RegisterCell RC = eSUB(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+ return rr0(RC, Outputs);
+ }
+ case M2_nacci: {
+ RegisterCell RC = eSUB(rc(1), eADD(rc(2), rc(3)));
+ return rr0(RC, Outputs);
+ }
+ // 32-bit negation is done by "Rd = A2_subri 0, Rs"
+ case A2_negp:
+ return rr0(eSUB(eIMM(0, W0), rc(1)), Outputs);
+
+ case M2_mpy_up: {
+ RegisterCell M = eMLS(rc(1), rc(2));
+ return rr0(hi(M, W0), Outputs);
+ }
+ case M2_dpmpyss_s0:
+ return rr0(eMLS(rc(1), rc(2)), Outputs);
+ case M2_dpmpyss_acc_s0:
+ return rr0(eADD(rc(1), eMLS(rc(2), rc(3))), Outputs);
+ case M2_dpmpyss_nac_s0:
+ return rr0(eSUB(rc(1), eMLS(rc(2), rc(3))), Outputs);
+ case M2_mpyi: {
+ RegisterCell M = eMLS(rc(1), rc(2));
+ return rr0(lo(M, W0), Outputs);
+ }
+ case M2_macsip: {
+ RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+ RegisterCell RC = eADD(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M2_macsin: {
+ RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+ RegisterCell RC = eSUB(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M2_maci: {
+ RegisterCell M = eMLS(rc(2), rc(3));
+ RegisterCell RC = eADD(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M2_mpysmi: {
+ RegisterCell M = eMLS(rc(1), eIMM(im(2), W0));
+ return rr0(lo(M, 32), Outputs);
+ }
+ case M2_mpysin: {
+ RegisterCell M = eMLS(rc(1), eIMM(-im(2), W0));
+ return rr0(lo(M, 32), Outputs);
+ }
+ case M2_mpysip: {
+ RegisterCell M = eMLS(rc(1), eIMM(im(2), W0));
+ return rr0(lo(M, 32), Outputs);
+ }
+ case M2_mpyu_up: {
+ RegisterCell M = eMLU(rc(1), rc(2));
+ return rr0(hi(M, W0), Outputs);
+ }
+ case M2_dpmpyuu_s0:
+ return rr0(eMLU(rc(1), rc(2)), Outputs);
+ case M2_dpmpyuu_acc_s0:
+ return rr0(eADD(rc(1), eMLU(rc(2), rc(3))), Outputs);
+ case M2_dpmpyuu_nac_s0:
+ return rr0(eSUB(rc(1), eMLU(rc(2), rc(3))), Outputs);
+ //case M2_mpysu_up:
+
+ // Logical/bitwise:
+
+ case A2_andir:
+ return rr0(eAND(rc(1), eIMM(im(2), W0)), Outputs);
+ case A2_and:
+ case A2_andp:
+ return rr0(eAND(rc(1), rc(2)), Outputs);
+ case A4_andn:
+ case A4_andnp:
+ return rr0(eAND(rc(1), eNOT(rc(2))), Outputs);
+ case S4_andi_asl_ri: {
+ RegisterCell RC = eAND(eIMM(im(1), W0), eASL(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case S4_andi_lsr_ri: {
+ RegisterCell RC = eAND(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case M4_and_and:
+ return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs);
+ case M4_and_andn:
+ return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+ case M4_and_or:
+ return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs);
+ case M4_and_xor:
+ return rr0(eAND(rc(1), eXOR(rc(2), rc(3))), Outputs);
+ case A2_orir:
+ return rr0(eORL(rc(1), eIMM(im(2), W0)), Outputs);
+ case A2_or:
+ case A2_orp:
+ return rr0(eORL(rc(1), rc(2)), Outputs);
+ case A4_orn:
+ case A4_ornp:
+ return rr0(eORL(rc(1), eNOT(rc(2))), Outputs);
+ case S4_ori_asl_ri: {
+ RegisterCell RC = eORL(eIMM(im(1), W0), eASL(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case S4_ori_lsr_ri: {
+ RegisterCell RC = eORL(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case M4_or_and:
+ return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs);
+ case M4_or_andn:
+ return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+ case S4_or_andi:
+ case S4_or_andix: {
+ RegisterCell RC = eORL(rc(1), eAND(rc(2), eIMM(im(3), W0)));
+ return rr0(RC, Outputs);
+ }
+ case S4_or_ori: {
+ RegisterCell RC = eORL(rc(1), eORL(rc(2), eIMM(im(3), W0)));
+ return rr0(RC, Outputs);
+ }
+ case M4_or_or:
+ return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs);
+ case M4_or_xor:
+ return rr0(eORL(rc(1), eXOR(rc(2), rc(3))), Outputs);
+ case A2_xor:
+ case A2_xorp:
+ return rr0(eXOR(rc(1), rc(2)), Outputs);
+ case M4_xor_and:
+ return rr0(eXOR(rc(1), eAND(rc(2), rc(3))), Outputs);
+ case M4_xor_andn:
+ return rr0(eXOR(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+ case M4_xor_or:
+ return rr0(eXOR(rc(1), eORL(rc(2), rc(3))), Outputs);
+ case M4_xor_xacc:
+ return rr0(eXOR(rc(1), eXOR(rc(2), rc(3))), Outputs);
+ case A2_not:
+ case A2_notp:
+ return rr0(eNOT(rc(1)), Outputs);
+
+ case S2_asl_i_r:
+ case S2_asl_i_p:
+ return rr0(eASL(rc(1), im(2)), Outputs);
+ case A2_aslh:
+ return rr0(eASL(rc(1), 16), Outputs);
+ case S2_asl_i_r_acc:
+ case S2_asl_i_p_acc:
+ return rr0(eADD(rc(1), eASL(rc(2), im(3))), Outputs);
+ case S2_asl_i_r_nac:
+ case S2_asl_i_p_nac:
+ return rr0(eSUB(rc(1), eASL(rc(2), im(3))), Outputs);
+ case S2_asl_i_r_and:
+ case S2_asl_i_p_and:
+ return rr0(eAND(rc(1), eASL(rc(2), im(3))), Outputs);
+ case S2_asl_i_r_or:
+ case S2_asl_i_p_or:
+ return rr0(eORL(rc(1), eASL(rc(2), im(3))), Outputs);
+ case S2_asl_i_r_xacc:
+ case S2_asl_i_p_xacc:
+ return rr0(eXOR(rc(1), eASL(rc(2), im(3))), Outputs);
+ case S2_asl_i_vh:
+ case S2_asl_i_vw:
+ // TODO
+ break;
+
+ case S2_asr_i_r:
+ case S2_asr_i_p:
+ return rr0(eASR(rc(1), im(2)), Outputs);
+ case A2_asrh:
+ return rr0(eASR(rc(1), 16), Outputs);
+ case S2_asr_i_r_acc:
+ case S2_asr_i_p_acc:
+ return rr0(eADD(rc(1), eASR(rc(2), im(3))), Outputs);
+ case S2_asr_i_r_nac:
+ case S2_asr_i_p_nac:
+ return rr0(eSUB(rc(1), eASR(rc(2), im(3))), Outputs);
+ case S2_asr_i_r_and:
+ case S2_asr_i_p_and:
+ return rr0(eAND(rc(1), eASR(rc(2), im(3))), Outputs);
+ case S2_asr_i_r_or:
+ case S2_asr_i_p_or:
+ return rr0(eORL(rc(1), eASR(rc(2), im(3))), Outputs);
+ case S2_asr_i_r_rnd: {
+ // The input is first sign-extended to 64 bits, then the output
+ // is truncated back to 32 bits.
+ assert(W0 == 32);
+ RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0);
+ RegisterCell RC = eASR(eADD(eASR(XC, im(2)), eIMM(1, 2*W0)), 1);
+ return rr0(eXTR(RC, 0, W0), Outputs);
+ }
+ case S2_asr_i_r_rnd_goodsyntax: {
+ int64_t S = im(2);
+ if (S == 0)
+ return rr0(rc(1), Outputs);
+ // Result: S2_asr_i_r_rnd Rs, u5-1
+ RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0);
+ RegisterCell RC = eLSR(eADD(eASR(XC, S-1), eIMM(1, 2*W0)), 1);
+ return rr0(eXTR(RC, 0, W0), Outputs);
+ }
+ case S2_asr_r_vh:
+ case S2_asr_i_vw:
+ case S2_asr_i_svw_trun:
+ // TODO
+ break;
+
+ case S2_lsr_i_r:
+ case S2_lsr_i_p:
+ return rr0(eLSR(rc(1), im(2)), Outputs);
+ case S2_lsr_i_r_acc:
+ case S2_lsr_i_p_acc:
+ return rr0(eADD(rc(1), eLSR(rc(2), im(3))), Outputs);
+ case S2_lsr_i_r_nac:
+ case S2_lsr_i_p_nac:
+ return rr0(eSUB(rc(1), eLSR(rc(2), im(3))), Outputs);
+ case S2_lsr_i_r_and:
+ case S2_lsr_i_p_and:
+ return rr0(eAND(rc(1), eLSR(rc(2), im(3))), Outputs);
+ case S2_lsr_i_r_or:
+ case S2_lsr_i_p_or:
+ return rr0(eORL(rc(1), eLSR(rc(2), im(3))), Outputs);
+ case S2_lsr_i_r_xacc:
+ case S2_lsr_i_p_xacc:
+ return rr0(eXOR(rc(1), eLSR(rc(2), im(3))), Outputs);
+
+ case S2_clrbit_i: {
+ RegisterCell RC = rc(1);
+ RC[im(2)] = BT::BitValue::Zero;
+ return rr0(RC, Outputs);
+ }
+ case S2_setbit_i: {
+ RegisterCell RC = rc(1);
+ RC[im(2)] = BT::BitValue::One;
+ return rr0(RC, Outputs);
+ }
+ case S2_togglebit_i: {
+ RegisterCell RC = rc(1);
+ uint16_t BX = im(2);
+ RC[BX] = RC[BX].is(0) ? BT::BitValue::One
+ : RC[BX].is(1) ? BT::BitValue::Zero
+ : BT::BitValue::self();
+ return rr0(RC, Outputs);
+ }
+
+ case A4_bitspliti: {
+ uint16_t W1 = getRegBitWidth(Reg[1]);
+ uint16_t BX = im(2);
+ // Res.uw[1] = Rs[bx+1:], Res.uw[0] = Rs[0:bx]
+ const BT::BitValue Zero = BT::BitValue::Zero;
+ RegisterCell RZ = RegisterCell(W0).fill(BX, W1, Zero)
+ .fill(W1+(W1-BX), W0, Zero);
+ RegisterCell BF1 = eXTR(rc(1), 0, BX), BF2 = eXTR(rc(1), BX, W1);
+ RegisterCell RC = eINS(eINS(RZ, BF1, 0), BF2, W1);
+ return rr0(RC, Outputs);
+ }
+ case S4_extract:
+ case S4_extractp:
+ case S2_extractu:
+ case S2_extractup: {
+ uint16_t Wd = im(2), Of = im(3);
+ assert(Wd <= W0);
+ if (Wd == 0)
+ return rr0(eIMM(0, W0), Outputs);
+ // If the width extends beyond the register size, pad the register
+ // with 0 bits.
+ RegisterCell Pad = (Wd+Of > W0) ? rc(1).cat(eIMM(0, Wd+Of-W0)) : rc(1);
+ RegisterCell Ext = eXTR(Pad, Of, Wd+Of);
+ // Ext is short, need to extend it with 0s or sign bit.
+ RegisterCell RC = RegisterCell(W0).insert(Ext, BT::BitMask(0, Wd-1));
+ if (Opc == S2_extractu || Opc == S2_extractup)
+ return rr0(eZXT(RC, Wd), Outputs);
+ return rr0(eSXT(RC, Wd), Outputs);
+ }
+ case S2_insert:
+ case S2_insertp: {
+ uint16_t Wd = im(3), Of = im(4);
+ assert(Wd < W0 && Of < W0);
+ // If Wd+Of exceeds W0, the inserted bits are truncated.
+ if (Wd+Of > W0)
+ Wd = W0-Of;
+ if (Wd == 0)
+ return rr0(rc(1), Outputs);
+ return rr0(eINS(rc(1), eXTR(rc(2), 0, Wd), Of), Outputs);
+ }
+
+ // Bit permutations:
+
+ case A2_combineii:
+ case A4_combineii:
+ case A4_combineir:
+ case A4_combineri:
+ case A2_combinew:
+ assert(W0 % 2 == 0);
+ return rr0(cop(2, W0/2).cat(cop(1, W0/2)), Outputs);
+ case A2_combine_ll:
+ case A2_combine_lh:
+ case A2_combine_hl:
+ case A2_combine_hh: {
+ assert(W0 == 32);
+ assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32);
+ // Low half in the output is 0 for _ll and _hl, 1 otherwise:
+ unsigned LoH = !(Opc == A2_combine_ll || Opc == A2_combine_hl);
+ // High half in the output is 0 for _ll and _lh, 1 otherwise:
+ unsigned HiH = !(Opc == A2_combine_ll || Opc == A2_combine_lh);
+ RegisterCell R1 = rc(1);
+ RegisterCell R2 = rc(2);
+ RegisterCell RC = half(R2, LoH).cat(half(R1, HiH));
+ return rr0(RC, Outputs);
+ }
+ case S2_packhl: {
+ assert(W0 == 64);
+ assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32);
+ RegisterCell R1 = rc(1);
+ RegisterCell R2 = rc(2);
+ RegisterCell RC = half(R2, 0).cat(half(R1, 0)).cat(half(R2, 1))
+ .cat(half(R1, 1));
+ return rr0(RC, Outputs);
+ }
+ case S2_shuffeb: {
+ RegisterCell RC = shuffle(rc(1), rc(2), 8, false);
+ return rr0(RC, Outputs);
+ }
+ case S2_shuffeh: {
+ RegisterCell RC = shuffle(rc(1), rc(2), 16, false);
+ return rr0(RC, Outputs);
+ }
+ case S2_shuffob: {
+ RegisterCell RC = shuffle(rc(1), rc(2), 8, true);
+ return rr0(RC, Outputs);
+ }
+ case S2_shuffoh: {
+ RegisterCell RC = shuffle(rc(1), rc(2), 16, true);
+ return rr0(RC, Outputs);
+ }
+ case C2_mask: {
+ uint16_t WR = W0;
+ uint16_t WP = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+ assert(WR == 64 && WP == 8);
+ RegisterCell R1 = rc(1);
+ RegisterCell RC(WR);
+ for (uint16_t i = 0; i < WP; ++i) {
+ const BT::BitValue &V = R1[i];
+ BT::BitValue F = (V.is(0) || V.is(1)) ? V : BT::BitValue::self();
+ RC.fill(i*8, i*8+8, F);
+ }
+ return rr0(RC, Outputs);
+ }
+
+ // Mux:
+
+ case C2_muxii:
+ case C2_muxir:
+ case C2_muxri:
+ case C2_mux: {
+ BT::BitValue PC0 = rc(1)[0];
+ RegisterCell R2 = cop(2, W0);
+ RegisterCell R3 = cop(3, W0);
+ if (PC0.is(0) || PC0.is(1))
+ return rr0(RegisterCell::ref(PC0 ? R2 : R3), Outputs);
+ R2.meet(R3, Reg[0].Reg);
+ return rr0(R2, Outputs);
+ }
+ case C2_vmux:
+ // TODO
+ break;
+
+ // Sign- and zero-extension:
+
+ case A2_sxtb:
+ return rr0(eSXT(rc(1), 8), Outputs);
+ case A2_sxth:
+ return rr0(eSXT(rc(1), 16), Outputs);
+ case A2_sxtw: {
+ uint16_t W1 = getRegBitWidth(Reg[1]);
+ assert(W0 == 64 && W1 == 32);
+ RegisterCell RC = eSXT(rc(1).cat(eIMM(0, W1)), W1);
+ return rr0(RC, Outputs);
+ }
+ case A2_zxtb:
+ return rr0(eZXT(rc(1), 8), Outputs);
+ case A2_zxth:
+ return rr0(eZXT(rc(1), 16), Outputs);
+
+ // Bit count:
+
+ case S2_cl0:
+ case S2_cl0p:
+ // Always produce a 32-bit result.
+ return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs);
+ case S2_cl1:
+ case S2_cl1p:
+ return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs);
+ case S2_clb:
+ case S2_clbp: {
+ uint16_t W1 = getRegBitWidth(Reg[1]);
+ RegisterCell R1 = rc(1);
+ BT::BitValue TV = R1[W1-1];
+ if (TV.is(0) || TV.is(1))
+ return rr0(eCLB(R1, TV, 32), Outputs);
+ break;
+ }
+ case S2_ct0:
+ case S2_ct0p:
+ return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs);
+ case S2_ct1:
+ case S2_ct1p:
+ return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs);
+ case S5_popcountp:
+ // TODO
+ break;
+
+ case C2_all8: {
+ RegisterCell P1 = rc(1);
+ bool Has0 = false, All1 = true;
+ for (uint16_t i = 0; i < 8/*XXX*/; ++i) {
+ if (!P1[i].is(1))
+ All1 = false;
+ if (!P1[i].is(0))
+ continue;
+ Has0 = true;
+ break;
+ }
+ if (!Has0 && !All1)
+ break;
+ RegisterCell RC(W0);
+ RC.fill(0, W0, (All1 ? BT::BitValue::One : BT::BitValue::Zero));
+ return rr0(RC, Outputs);
+ }
+ case C2_any8: {
+ RegisterCell P1 = rc(1);
+ bool Has1 = false, All0 = true;
+ for (uint16_t i = 0; i < 8/*XXX*/; ++i) {
+ if (!P1[i].is(0))
+ All0 = false;
+ if (!P1[i].is(1))
+ continue;
+ Has1 = true;
+ break;
+ }
+ if (!Has1 && !All0)
+ break;
+ RegisterCell RC(W0);
+ RC.fill(0, W0, (Has1 ? BT::BitValue::One : BT::BitValue::Zero));
+ return rr0(RC, Outputs);
+ }
+ case C2_and:
+ return rr0(eAND(rc(1), rc(2)), Outputs);
+ case C2_andn:
+ return rr0(eAND(rc(1), eNOT(rc(2))), Outputs);
+ case C2_not:
+ return rr0(eNOT(rc(1)), Outputs);
+ case C2_or:
+ return rr0(eORL(rc(1), rc(2)), Outputs);
+ case C2_orn:
+ return rr0(eORL(rc(1), eNOT(rc(2))), Outputs);
+ case C2_xor:
+ return rr0(eXOR(rc(1), rc(2)), Outputs);
+ case C4_and_and:
+ return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs);
+ case C4_and_andn:
+ return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+ case C4_and_or:
+ return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs);
+ case C4_and_orn:
+ return rr0(eAND(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs);
+ case C4_or_and:
+ return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs);
+ case C4_or_andn:
+ return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+ case C4_or_or:
+ return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs);
+ case C4_or_orn:
+ return rr0(eORL(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs);
+ case C2_bitsclr:
+ case C2_bitsclri:
+ case C2_bitsset:
+ case C4_nbitsclr:
+ case C4_nbitsclri:
+ case C4_nbitsset:
+ // TODO
+ break;
+ case S2_tstbit_i:
+ case S4_ntstbit_i: {
+ BT::BitValue V = rc(1)[im(2)];
+ if (V.is(0) || V.is(1)) {
+ // If instruction is S2_tstbit_i, test for 1, otherwise test for 0.
+ bool TV = (Opc == S2_tstbit_i);
+ BT::BitValue F = V.is(TV) ? BT::BitValue::One : BT::BitValue::Zero;
+ return rr0(RegisterCell(W0).fill(0, W0, F), Outputs);
+ }
+ break;
+ }
+
+ default:
+ return MachineEvaluator::evaluate(MI, Inputs, Outputs);
+ }
+ #undef im
+ #undef rc
+ #undef op
+ return false;
+}
+
+
+bool HexagonEvaluator::evaluate(const MachineInstr *BI,
+ const CellMapType &Inputs, BranchTargetList &Targets,
+ bool &FallsThru) const {
+ // We need to evaluate one branch at a time. TII::AnalyzeBranch checks
+ // all the branches in a basic block at once, so we cannot use it.
+ unsigned Opc = BI->getOpcode();
+ bool SimpleBranch = false;
+ bool Negated = false;
+ switch (Opc) {
+ case Hexagon::J2_jumpf:
+ case Hexagon::J2_jumpfnew:
+ case Hexagon::J2_jumpfnewpt:
+ Negated = true;
+ case Hexagon::J2_jumpt:
+ case Hexagon::J2_jumptnew:
+ case Hexagon::J2_jumptnewpt:
+ // Simple branch: if([!]Pn) jump ...
+ // i.e. Op0 = predicate, Op1 = branch target.
+ SimpleBranch = true;
+ break;
+ case Hexagon::J2_jump:
+ Targets.insert(BI->getOperand(0).getMBB());
+ FallsThru = false;
+ return true;
+ default:
+ // If the branch is of unknown type, assume that all successors are
+ // executable.
+ return false;
+ }
+
+ if (!SimpleBranch)
+ return false;
+
+ // BI is a conditional branch if we got here.
+ RegisterRef PR = BI->getOperand(0);
+ RegisterCell PC = getCell(PR, Inputs);
+ const BT::BitValue &Test = PC[0];
+
+ // If the condition is neither true nor false, then it's unknown.
+ if (!Test.is(0) && !Test.is(1))
+ return false;
+
+ // "Test.is(!Negated)" means "branch condition is true".
+ if (!Test.is(!Negated)) {
+ // Condition known to be false.
+ FallsThru = true;
+ return true;
+ }
+
+ Targets.insert(BI->getOperand(1).getMBB());
+ FallsThru = false;
+ return true;
+}
+
+
+bool HexagonEvaluator::evaluateLoad(const MachineInstr *MI,
+ const CellMapType &Inputs, CellMapType &Outputs) const {
+ if (TII.isPredicated(MI))
+ return false;
+ assert(MI->mayLoad() && "A load that mayn't?");
+ unsigned Opc = MI->getOpcode();
+
+ uint16_t BitNum;
+ bool SignEx;
+ using namespace Hexagon;
+
+ switch (Opc) {
+ default:
+ return false;
+
+#if 0
+ // memb_fifo
+ case L2_loadalignb_pbr:
+ case L2_loadalignb_pcr:
+ case L2_loadalignb_pi:
+ // memh_fifo
+ case L2_loadalignh_pbr:
+ case L2_loadalignh_pcr:
+ case L2_loadalignh_pi:
+ // membh
+ case L2_loadbsw2_pbr:
+ case L2_loadbsw2_pci:
+ case L2_loadbsw2_pcr:
+ case L2_loadbsw2_pi:
+ case L2_loadbsw4_pbr:
+ case L2_loadbsw4_pci:
+ case L2_loadbsw4_pcr:
+ case L2_loadbsw4_pi:
+ // memubh
+ case L2_loadbzw2_pbr:
+ case L2_loadbzw2_pci:
+ case L2_loadbzw2_pcr:
+ case L2_loadbzw2_pi:
+ case L2_loadbzw4_pbr:
+ case L2_loadbzw4_pci:
+ case L2_loadbzw4_pcr:
+ case L2_loadbzw4_pi:
+#endif
+
+ case L2_loadrbgp:
+ case L2_loadrb_io:
+ case L2_loadrb_pbr:
+ case L2_loadrb_pci:
+ case L2_loadrb_pcr:
+ case L2_loadrb_pi:
+ case L4_loadrb_abs:
+ case L4_loadrb_ap:
+ case L4_loadrb_rr:
+ case L4_loadrb_ur:
+ BitNum = 8;
+ SignEx = true;
+ break;
+
+ case L2_loadrubgp:
+ case L2_loadrub_io:
+ case L2_loadrub_pbr:
+ case L2_loadrub_pci:
+ case L2_loadrub_pcr:
+ case L2_loadrub_pi:
+ case L4_loadrub_abs:
+ case L4_loadrub_ap:
+ case L4_loadrub_rr:
+ case L4_loadrub_ur:
+ BitNum = 8;
+ SignEx = false;
+ break;
+
+ case L2_loadrhgp:
+ case L2_loadrh_io:
+ case L2_loadrh_pbr:
+ case L2_loadrh_pci:
+ case L2_loadrh_pcr:
+ case L2_loadrh_pi:
+ case L4_loadrh_abs:
+ case L4_loadrh_ap:
+ case L4_loadrh_rr:
+ case L4_loadrh_ur:
+ BitNum = 16;
+ SignEx = true;
+ break;
+
+ case L2_loadruhgp:
+ case L2_loadruh_io:
+ case L2_loadruh_pbr:
+ case L2_loadruh_pci:
+ case L2_loadruh_pcr:
+ case L2_loadruh_pi:
+ case L4_loadruh_rr:
+ case L4_loadruh_abs:
+ case L4_loadruh_ap:
+ case L4_loadruh_ur:
+ BitNum = 16;
+ SignEx = false;
+ break;
+
+ case L2_loadrigp:
+ case L2_loadri_io:
+ case L2_loadri_pbr:
+ case L2_loadri_pci:
+ case L2_loadri_pcr:
+ case L2_loadri_pi:
+ case L2_loadw_locked:
+ case L4_loadri_abs:
+ case L4_loadri_ap:
+ case L4_loadri_rr:
+ case L4_loadri_ur:
+ case LDriw_pred:
+ BitNum = 32;
+ SignEx = true;
+ break;
+
+ case L2_loadrdgp:
+ case L2_loadrd_io:
+ case L2_loadrd_pbr:
+ case L2_loadrd_pci:
+ case L2_loadrd_pcr:
+ case L2_loadrd_pi:
+ case L4_loadd_locked:
+ case L4_loadrd_abs:
+ case L4_loadrd_ap:
+ case L4_loadrd_rr:
+ case L4_loadrd_ur:
+ BitNum = 64;
+ SignEx = true;
+ break;
+ }
+
+ const MachineOperand &MD = MI->getOperand(0);
+ assert(MD.isReg() && MD.isDef());
+ RegisterRef RD = MD;
+
+ uint16_t W = getRegBitWidth(RD);
+ assert(W >= BitNum && BitNum > 0);
+ RegisterCell Res(W);
+
+ for (uint16_t i = 0; i < BitNum; ++i)
+ Res[i] = BT::BitValue::self(BT::BitRef(RD.Reg, i));
+
+ if (SignEx) {
+ const BT::BitValue &Sign = Res[BitNum-1];
+ for (uint16_t i = BitNum; i < W; ++i)
+ Res[i] = BT::BitValue::ref(Sign);
+ } else {
+ for (uint16_t i = BitNum; i < W; ++i)
+ Res[i] = BT::BitValue::Zero;
+ }
+
+ putCell(RD, Res, Outputs);
+ return true;
+}
+
+
+bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr *MI,
+ const CellMapType &Inputs, CellMapType &Outputs) const {
+ // If MI defines a formal parameter, but is not a copy (loads are handled
+ // in evaluateLoad), then it's not clear what to do.
+ assert(MI->isCopy());
+
+ RegisterRef RD = MI->getOperand(0);
+ RegisterRef RS = MI->getOperand(1);
+ assert(RD.Sub == 0);
+ if (!TargetRegisterInfo::isPhysicalRegister(RS.Reg))
+ return false;
+ RegExtMap::const_iterator F = VRX.find(RD.Reg);
+ if (F == VRX.end())
+ return false;
+
+ uint16_t EW = F->second.Width;
+ // Store RD's cell into the map. This will associate the cell with a virtual
+ // register, and make zero-/sign-extends possible (otherwise we would be ex-
+ // tending "self" bit values, which will have no effect, since "self" values
+ // cannot be references to anything).
+ putCell(RD, getCell(RS, Inputs), Outputs);
+
+ RegisterCell Res;
+ // Read RD's cell from the outputs instead of RS's cell from the inputs:
+ if (F->second.Type == ExtType::SExt)
+ Res = eSXT(getCell(RD, Outputs), EW);
+ else if (F->second.Type == ExtType::ZExt)
+ Res = eZXT(getCell(RD, Outputs), EW);
+
+ putCell(RD, Res, Outputs);
+ return true;
+}
+
+
+unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
+ using namespace Hexagon;
+ bool Is64 = DoubleRegsRegClass.contains(PReg);
+ assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));
+
+ static const unsigned Phys32[] = { R0, R1, R2, R3, R4, R5 };
+ static const unsigned Phys64[] = { D0, D1, D2 };
+ const unsigned Num32 = sizeof(Phys32)/sizeof(unsigned);
+ const unsigned Num64 = sizeof(Phys64)/sizeof(unsigned);
+
+ // Return the first parameter register of the required width.
+ if (PReg == 0)
+ return (Width <= 32) ? Phys32[0] : Phys64[0];
+
+ // Set Idx32, Idx64 in such a way that Idx+1 would give the index of the
+ // next register.
+ unsigned Idx32 = 0, Idx64 = 0;
+ if (!Is64) {
+ while (Idx32 < Num32) {
+ if (Phys32[Idx32] == PReg)
+ break;
+ Idx32++;
+ }
+ Idx64 = Idx32/2;
+ } else {
+ while (Idx64 < Num64) {
+ if (Phys64[Idx64] == PReg)
+ break;
+ Idx64++;
+ }
+ Idx32 = Idx64*2+1;
+ }
+
+ if (Width <= 32)
+ return (Idx32+1 < Num32) ? Phys32[Idx32+1] : 0;
+ return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
+}
+
+
+unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const {
+ typedef MachineRegisterInfo::livein_iterator iterator;
+ for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
+ if (I->first == PReg)
+ return I->second;
+ }
+ return 0;
+}
diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h
new file mode 100644
index 000000000000..897af2d71870
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonBitTracker.h
@@ -0,0 +1,64 @@
+//===--- HexagonBitTracker.h ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONBITTRACKER_H
+#define HEXAGONBITTRACKER_H
+
+#include "BitTracker.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+ class HexagonInstrInfo;
+ class HexagonRegisterInfo;
+
+struct HexagonEvaluator : public BitTracker::MachineEvaluator {
+ typedef BitTracker::CellMapType CellMapType;
+ typedef BitTracker::RegisterRef RegisterRef;
+ typedef BitTracker::RegisterCell RegisterCell;
+ typedef BitTracker::BranchTargetList BranchTargetList;
+
+ HexagonEvaluator(const HexagonRegisterInfo &tri, MachineRegisterInfo &mri,
+ const HexagonInstrInfo &tii, MachineFunction &mf);
+
+ bool evaluate(const MachineInstr *MI, const CellMapType &Inputs,
+ CellMapType &Outputs) const override;
+ bool evaluate(const MachineInstr *BI, const CellMapType &Inputs,
+ BranchTargetList &Targets, bool &FallsThru) const override;
+
+ BitTracker::BitMask mask(unsigned Reg, unsigned Sub) const override;
+
+ MachineFunction &MF;
+ MachineFrameInfo &MFI;
+ const HexagonInstrInfo &TII;
+
+private:
+ bool evaluateLoad(const MachineInstr *MI, const CellMapType &Inputs,
+ CellMapType &Outputs) const;
+ bool evaluateFormalCopy(const MachineInstr *MI, const CellMapType &Inputs,
+ CellMapType &Outputs) const;
+
+ unsigned getNextPhysReg(unsigned PReg, unsigned Width) const;
+ unsigned getVirtRegFor(unsigned PReg) const;
+
+ // Type of formal parameter extension.
+ struct ExtType {
+ enum { SExt, ZExt };
+ char Type;
+ uint16_t Width;
+ ExtType() : Type(0), Width(0) {}
+ ExtType(char t, uint16_t w) : Type(t), Width(w) {}
+ };
+ // Map VR -> extension type.
+ typedef DenseMap<unsigned, ExtType> RegExtMap;
+ RegExtMap VRX;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp
new file mode 100644
index 000000000000..9f5fac156527
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -0,0 +1,1325 @@
+//===--- HexagonCommonGEP.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "commgep"
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<bool> OptSpeculate("commgep-speculate", cl::init(true),
+ cl::Hidden, cl::ZeroOrMore);
+
+static cl::opt<bool> OptEnableInv("commgep-inv", cl::init(true), cl::Hidden,
+ cl::ZeroOrMore);
+
+static cl::opt<bool> OptEnableConst("commgep-const", cl::init(true),
+ cl::Hidden, cl::ZeroOrMore);
+
+namespace llvm {
+ void initializeHexagonCommonGEPPass(PassRegistry&);
+}
+
+namespace {
+ struct GepNode;
+ typedef std::set<GepNode*> NodeSet;
+ typedef std::map<GepNode*,Value*> NodeToValueMap;
+ typedef std::vector<GepNode*> NodeVect;
+ typedef std::map<GepNode*,NodeVect> NodeChildrenMap;
+ typedef std::set<Use*> UseSet;
+ typedef std::map<GepNode*,UseSet> NodeToUsesMap;
+
+ // Numbering map for gep nodes. Used to keep track of ordering for
+ // gep nodes.
+ struct NodeNumbering : public std::map<const GepNode*,unsigned> {
+ };
+
+ struct NodeOrdering : public NodeNumbering {
+ NodeOrdering() : LastNum(0) {}
+#ifdef _MSC_VER
+ void special_insert_for_special_msvc(const GepNode *N)
+#else
+ using NodeNumbering::insert;
+ void insert(const GepNode* N)
+#endif
+ {
+ insert(std::make_pair(N, ++LastNum));
+ }
+ bool operator() (const GepNode* N1, const GepNode *N2) const {
+ const_iterator F1 = find(N1), F2 = find(N2);
+ assert(F1 != end() && F2 != end());
+ return F1->second < F2->second;
+ }
+ private:
+ unsigned LastNum;
+ };
+
+
+ class HexagonCommonGEP : public FunctionPass {
+ public:
+ static char ID;
+ HexagonCommonGEP() : FunctionPass(ID) {
+ initializeHexagonCommonGEPPass(*PassRegistry::getPassRegistry());
+ }
+ virtual bool runOnFunction(Function &F);
+ virtual const char *getPassName() const {
+ return "Hexagon Common GEP";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
+ AU.addPreserved<PostDominatorTree>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ typedef std::map<Value*,GepNode*> ValueToNodeMap;
+ typedef std::vector<Value*> ValueVect;
+ typedef std::map<GepNode*,ValueVect> NodeToValuesMap;
+
+ void getBlockTraversalOrder(BasicBlock *Root, ValueVect &Order);
+ bool isHandledGepForm(GetElementPtrInst *GepI);
+ void processGepInst(GetElementPtrInst *GepI, ValueToNodeMap &NM);
+ void collect();
+ void common();
+
+ BasicBlock *recalculatePlacement(GepNode *Node, NodeChildrenMap &NCM,
+ NodeToValueMap &Loc);
+ BasicBlock *recalculatePlacementRec(GepNode *Node, NodeChildrenMap &NCM,
+ NodeToValueMap &Loc);
+ bool isInvariantIn(Value *Val, Loop *L);
+ bool isInvariantIn(GepNode *Node, Loop *L);
+ bool isInMainPath(BasicBlock *B, Loop *L);
+ BasicBlock *adjustForInvariance(GepNode *Node, NodeChildrenMap &NCM,
+ NodeToValueMap &Loc);
+ void separateChainForNode(GepNode *Node, Use *U, NodeToValueMap &Loc);
+ void separateConstantChains(GepNode *Node, NodeChildrenMap &NCM,
+ NodeToValueMap &Loc);
+ void computeNodePlacement(NodeToValueMap &Loc);
+
+ Value *fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
+ BasicBlock *LocB);
+ void getAllUsersForNode(GepNode *Node, ValueVect &Values,
+ NodeChildrenMap &NCM);
+ void materialize(NodeToValueMap &Loc);
+
+ void removeDeadCode();
+
+ NodeVect Nodes;
+ NodeToUsesMap Uses;
+ NodeOrdering NodeOrder; // Node ordering, for deterministic behavior.
+ SpecificBumpPtrAllocator<GepNode> *Mem;
+ LLVMContext *Ctx;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ PostDominatorTree *PDT;
+ Function *Fn;
+ };
+}
+
+
+char HexagonCommonGEP::ID = 0;
+INITIALIZE_PASS_BEGIN(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP",
+ false, false)
+
+namespace {
+ struct GepNode {
+ enum {
+ None = 0,
+ Root = 0x01,
+ Internal = 0x02,
+ Used = 0x04
+ };
+
+ uint32_t Flags;
+ union {
+ GepNode *Parent;
+ Value *BaseVal;
+ };
+ Value *Idx;
+ Type *PTy; // Type of the pointer operand.
+
+ GepNode() : Flags(0), Parent(0), Idx(0), PTy(0) {}
+ GepNode(const GepNode *N) : Flags(N->Flags), Idx(N->Idx), PTy(N->PTy) {
+ if (Flags & Root)
+ BaseVal = N->BaseVal;
+ else
+ Parent = N->Parent;
+ }
+ friend raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN);
+ };
+
+
+ Type *next_type(Type *Ty, Value *Idx) {
+ // Advance the type.
+ if (!Ty->isStructTy()) {
+ Type *NexTy = cast<SequentialType>(Ty)->getElementType();
+ return NexTy;
+ }
+ // Otherwise it is a struct type.
+ ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
+ assert(CI && "Struct type with non-constant index");
+ int64_t i = CI->getValue().getSExtValue();
+ Type *NextTy = cast<StructType>(Ty)->getElementType(i);
+ return NextTy;
+ }
+
+
+ raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN) {
+ OS << "{ {";
+ bool Comma = false;
+ if (GN.Flags & GepNode::Root) {
+ OS << "root";
+ Comma = true;
+ }
+ if (GN.Flags & GepNode::Internal) {
+ if (Comma)
+ OS << ',';
+ OS << "internal";
+ Comma = true;
+ }
+ if (GN.Flags & GepNode::Used) {
+ if (Comma)
+ OS << ',';
+ OS << "used";
+ Comma = true;
+ }
+ OS << "} ";
+ if (GN.Flags & GepNode::Root)
+ OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')';
+ else
+ OS << "Parent:" << GN.Parent;
+
+ OS << " Idx:";
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GN.Idx))
+ OS << CI->getValue().getSExtValue();
+ else if (GN.Idx->hasName())
+ OS << GN.Idx->getName();
+ else
+ OS << "<anon> =" << *GN.Idx;
+
+ OS << " PTy:";
+ if (GN.PTy->isStructTy()) {
+ StructType *STy = cast<StructType>(GN.PTy);
+ if (!STy->isLiteral())
+ OS << GN.PTy->getStructName();
+ else
+ OS << "<anon-struct>:" << *STy;
+ }
+ else
+ OS << *GN.PTy;
+ OS << " }";
+ return OS;
+ }
+
+
+ template <typename NodeContainer>
+ void dump_node_container(raw_ostream &OS, const NodeContainer &S) {
+ typedef typename NodeContainer::const_iterator const_iterator;
+ for (const_iterator I = S.begin(), E = S.end(); I != E; ++I)
+ OS << *I << ' ' << **I << '\n';
+ }
+
+ raw_ostream &operator<< (raw_ostream &OS,
+ const NodeVect &S) LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) {
+ dump_node_container(OS, S);
+ return OS;
+ }
+
+
+ raw_ostream &operator<< (raw_ostream &OS,
+ const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){
+ typedef NodeToUsesMap::const_iterator const_iterator;
+ for (const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ const UseSet &Us = I->second;
+ OS << I->first << " -> #" << Us.size() << '{';
+ for (UseSet::const_iterator J = Us.begin(), F = Us.end(); J != F; ++J) {
+ User *R = (*J)->getUser();
+ if (R->hasName())
+ OS << ' ' << R->getName();
+ else
+ OS << " <?>(" << *R << ')';
+ }
+ OS << " }\n";
+ }
+ return OS;
+ }
+
+
+ struct in_set {
+ in_set(const NodeSet &S) : NS(S) {}
+ bool operator() (GepNode *N) const {
+ return NS.find(N) != NS.end();
+ }
+ private:
+ const NodeSet &NS;
+ };
+}
+
+
+inline void *operator new(size_t, SpecificBumpPtrAllocator<GepNode> &A) {
+ return A.Allocate();
+}
+
+
+void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root,
+ ValueVect &Order) {
+ // Compute block ordering for a typical DT-based traversal of the flow
+ // graph: "before visiting a block, all of its dominators must have been
+ // visited".
+
+ Order.push_back(Root);
+ DomTreeNode *DTN = DT->getNode(Root);
+ typedef GraphTraits<DomTreeNode*> GTN;
+ typedef GTN::ChildIteratorType Iter;
+ for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
+ getBlockTraversalOrder((*I)->getBlock(), Order);
+}
+
+
+bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) {
+ // No vector GEPs.
+ if (!GepI->getType()->isPointerTy())
+ return false;
+ // No GEPs without any indices. (Is this possible?)
+ if (GepI->idx_begin() == GepI->idx_end())
+ return false;
+ return true;
+}
+
+
+void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
+ ValueToNodeMap &NM) {
+ DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n');
+ GepNode *N = new (*Mem) GepNode;
+ Value *PtrOp = GepI->getPointerOperand();
+ ValueToNodeMap::iterator F = NM.find(PtrOp);
+ if (F == NM.end()) {
+ N->BaseVal = PtrOp;
+ N->Flags |= GepNode::Root;
+ } else {
+ // If PtrOp was a GEP instruction, it must have already been processed.
+ // The ValueToNodeMap entry for it is the last gep node in the generated
+ // chain. Link to it here.
+ N->Parent = F->second;
+ }
+ N->PTy = PtrOp->getType();
+ N->Idx = *GepI->idx_begin();
+
+ // Collect the list of users of this GEP instruction. Will add it to the
+ // last node created for it.
+ UseSet Us;
+ for (Value::user_iterator UI = GepI->user_begin(), UE = GepI->user_end();
+ UI != UE; ++UI) {
+ // Check if this gep is used by anything other than other geps that
+ // we will process.
+ if (isa<GetElementPtrInst>(*UI)) {
+ GetElementPtrInst *UserG = cast<GetElementPtrInst>(*UI);
+ if (isHandledGepForm(UserG))
+ continue;
+ }
+ Us.insert(&UI.getUse());
+ }
+ Nodes.push_back(N);
+#ifdef _MSC_VER
+ NodeOrder.special_insert_for_special_msvc(N);
+#else
+ NodeOrder.insert(N);
+#endif
+
+ // Skip the first index operand, since we only handle 0. This dereferences
+ // the pointer operand.
+ GepNode *PN = N;
+ Type *PtrTy = cast<PointerType>(PtrOp->getType())->getElementType();
+ for (User::op_iterator OI = GepI->idx_begin()+1, OE = GepI->idx_end();
+ OI != OE; ++OI) {
+ Value *Op = *OI;
+ GepNode *Nx = new (*Mem) GepNode;
+ Nx->Parent = PN; // Link Nx to the previous node.
+ Nx->Flags |= GepNode::Internal;
+ Nx->PTy = PtrTy;
+ Nx->Idx = Op;
+ Nodes.push_back(Nx);
+#ifdef _MSC_VER
+ NodeOrder.special_insert_for_special_msvc(Nx);
+#else
+ NodeOrder.insert(Nx);
+#endif
+ PN = Nx;
+
+ PtrTy = next_type(PtrTy, Op);
+ }
+
+ // After last node has been created, update the use information.
+ if (!Us.empty()) {
+ PN->Flags |= GepNode::Used;
+ Uses[PN].insert(Us.begin(), Us.end());
+ }
+
+ // Link the last node with the originating GEP instruction. This is to
+ // help with linking chained GEP instructions.
+ NM.insert(std::make_pair(GepI, PN));
+}
+
+
+void HexagonCommonGEP::collect() {
+ // Establish depth-first traversal order of the dominator tree.
+ ValueVect BO;
+ getBlockTraversalOrder(Fn->begin(), BO);
+
+ // The creation of gep nodes requires DT-traversal. When processing a GEP
+ // instruction that uses another GEP instruction as the base pointer, the
+ // gep node for the base pointer should already exist.
+ ValueToNodeMap NM;
+ for (ValueVect::iterator I = BO.begin(), E = BO.end(); I != E; ++I) {
+ BasicBlock *B = cast<BasicBlock>(*I);
+ for (BasicBlock::iterator J = B->begin(), F = B->end(); J != F; ++J) {
+ if (!isa<GetElementPtrInst>(J))
+ continue;
+ GetElementPtrInst *GepI = cast<GetElementPtrInst>(J);
+ if (isHandledGepForm(GepI))
+ processGepInst(GepI, NM);
+ }
+ }
+
+ DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes);
+}
+
+
+namespace {
+ void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM,
+ NodeVect &Roots) {
+ typedef NodeVect::const_iterator const_iterator;
+ for (const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ GepNode *N = *I;
+ if (N->Flags & GepNode::Root) {
+ Roots.push_back(N);
+ continue;
+ }
+ GepNode *PN = N->Parent;
+ NCM[PN].push_back(N);
+ }
+ }
+
+ void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM, NodeSet &Nodes) {
+ NodeVect Work;
+ Work.push_back(Root);
+ Nodes.insert(Root);
+
+ while (!Work.empty()) {
+ NodeVect::iterator First = Work.begin();
+ GepNode *N = *First;
+ Work.erase(First);
+ NodeChildrenMap::iterator CF = NCM.find(N);
+ if (CF != NCM.end()) {
+ Work.insert(Work.end(), CF->second.begin(), CF->second.end());
+ Nodes.insert(CF->second.begin(), CF->second.end());
+ }
+ }
+ }
+}
+
+
+namespace {
+ typedef std::set<NodeSet> NodeSymRel;
+ typedef std::pair<GepNode*,GepNode*> NodePair;
+ typedef std::set<NodePair> NodePairSet;
+
+ const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) {
+ for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I)
+ if (I->count(N))
+ return &*I;
+ return 0;
+ }
+
+ // Create an ordered pair of GepNode pointers. The pair will be used in
+ // determining equality. The only purpose of the ordering is to eliminate
+ // duplication due to the commutativity of equality/non-equality.
+ NodePair node_pair(GepNode *N1, GepNode *N2) {
+ uintptr_t P1 = uintptr_t(N1), P2 = uintptr_t(N2);
+ if (P1 <= P2)
+ return std::make_pair(N1, N2);
+ return std::make_pair(N2, N1);
+ }
+
+ unsigned node_hash(GepNode *N) {
+ // Include everything except flags and parent.
+ FoldingSetNodeID ID;
+ ID.AddPointer(N->Idx);
+ ID.AddPointer(N->PTy);
+ return ID.ComputeHash();
+ }
+
+ bool node_eq(GepNode *N1, GepNode *N2, NodePairSet &Eq, NodePairSet &Ne) {
+ // Don't cache the result for nodes with different hashes. The hash
+ // comparison is fast enough.
+ if (node_hash(N1) != node_hash(N2))
+ return false;
+
+ NodePair NP = node_pair(N1, N2);
+ NodePairSet::iterator FEq = Eq.find(NP);
+ if (FEq != Eq.end())
+ return true;
+ NodePairSet::iterator FNe = Ne.find(NP);
+ if (FNe != Ne.end())
+ return false;
+ // Not previously compared.
+ bool Root1 = N1->Flags & GepNode::Root;
+ bool Root2 = N2->Flags & GepNode::Root;
+ NodePair P = node_pair(N1, N2);
+ // If the Root flag has different values, the nodes are different.
+ // If both nodes are root nodes, but their base pointers differ,
+ // they are different.
+ if (Root1 != Root2 || (Root1 && N1->BaseVal != N2->BaseVal)) {
+ Ne.insert(P);
+ return false;
+ }
+ // Here the root flags are identical, and for root nodes the
+ // base pointers are equal, so the root nodes are equal.
+ // For non-root nodes, compare their parent nodes.
+ if (Root1 || node_eq(N1->Parent, N2->Parent, Eq, Ne)) {
+ Eq.insert(P);
+ return true;
+ }
+ return false;
+ }
+}
+
+
+void HexagonCommonGEP::common() {
+ // The essence of this commoning is finding gep nodes that are equal.
+ // To do this we need to compare all pairs of nodes. To save time,
+ // first, partition the set of all nodes into sets of potentially equal
+ // nodes, and then compare pairs from within each partition.
+ typedef std::map<unsigned,NodeSet> NodeSetMap;
+ NodeSetMap MaybeEq;
+
+ for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ GepNode *N = *I;
+ unsigned H = node_hash(N);
+ MaybeEq[H].insert(N);
+ }
+
+ // Compute the equivalence relation for the gep nodes. Use two caches,
+ // one for equality and the other for non-equality.
+ NodeSymRel EqRel; // Equality relation (as set of equivalence classes).
+ NodePairSet Eq, Ne; // Caches.
+ for (NodeSetMap::iterator I = MaybeEq.begin(), E = MaybeEq.end();
+ I != E; ++I) {
+ NodeSet &S = I->second;
+ for (NodeSet::iterator NI = S.begin(), NE = S.end(); NI != NE; ++NI) {
+ GepNode *N = *NI;
+ // If node already has a class, then the class must have been created
+ // in a prior iteration of this loop. Since equality is transitive,
+ // nothing more will be added to that class, so skip it.
+ if (node_class(N, EqRel))
+ continue;
+
+ // Create a new class candidate now.
+ NodeSet C;
+ for (NodeSet::iterator NJ = std::next(NI); NJ != NE; ++NJ)
+ if (node_eq(N, *NJ, Eq, Ne))
+ C.insert(*NJ);
+ // If Tmp is empty, N would be the only element in it. Don't bother
+ // creating a class for it then.
+ if (!C.empty()) {
+ C.insert(N); // Finalize the set before adding it to the relation.
+ std::pair<NodeSymRel::iterator, bool> Ins = EqRel.insert(C);
+ (void)Ins;
+ assert(Ins.second && "Cannot add a class");
+ }
+ }
+ }
+
+ DEBUG({
+ dbgs() << "Gep node equality:\n";
+ for (NodePairSet::iterator I = Eq.begin(), E = Eq.end(); I != E; ++I)
+ dbgs() << "{ " << I->first << ", " << I->second << " }\n";
+
+ dbgs() << "Gep equivalence classes:\n";
+ for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
+ dbgs() << '{';
+ const NodeSet &S = *I;
+ for (NodeSet::const_iterator J = S.begin(), F = S.end(); J != F; ++J) {
+ if (J != S.begin())
+ dbgs() << ',';
+ dbgs() << ' ' << *J;
+ }
+ dbgs() << " }\n";
+ }
+ });
+
+
+ // Create a projection from a NodeSet to the minimal element in it.
+ typedef std::map<const NodeSet*,GepNode*> ProjMap;
+ ProjMap PM;
+ for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
+ const NodeSet &S = *I;
+ GepNode *Min = *std::min_element(S.begin(), S.end(), NodeOrder);
+ std::pair<ProjMap::iterator,bool> Ins = PM.insert(std::make_pair(&S, Min));
+ (void)Ins;
+ assert(Ins.second && "Cannot add minimal element");
+
+ // Update the min element's flags, and user list.
+ uint32_t Flags = 0;
+ UseSet &MinUs = Uses[Min];
+ for (NodeSet::iterator J = S.begin(), F = S.end(); J != F; ++J) {
+ GepNode *N = *J;
+ uint32_t NF = N->Flags;
+ // If N is used, append all original values of N to the list of
+ // original values of Min.
+ if (NF & GepNode::Used)
+ MinUs.insert(Uses[N].begin(), Uses[N].end());
+ Flags |= NF;
+ }
+ if (MinUs.empty())
+ Uses.erase(Min);
+
+ // The collected flags should include all the flags from the min element.
+ assert((Min->Flags & Flags) == Min->Flags);
+ Min->Flags = Flags;
+ }
+
+ // Commoning: for each non-root gep node, replace "Parent" with the
+ // selected (minimum) node from the corresponding equivalence class.
+ // If a given parent does not have an equivalence class, leave it
+ // unchanged (it means that it's the only element in its class).
+ for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ GepNode *N = *I;
+ if (N->Flags & GepNode::Root)
+ continue;
+ const NodeSet *PC = node_class(N->Parent, EqRel);
+ if (!PC)
+ continue;
+ ProjMap::iterator F = PM.find(PC);
+ if (F == PM.end())
+ continue;
+ // Found a replacement, use it.
+ GepNode *Rep = F->second;
+ N->Parent = Rep;
+ }
+
+ DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes);
+
+ // Finally, erase the nodes that are no longer used.
+ NodeSet Erase;
+ for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ GepNode *N = *I;
+ const NodeSet *PC = node_class(N, EqRel);
+ if (!PC)
+ continue;
+ ProjMap::iterator F = PM.find(PC);
+ if (F == PM.end())
+ continue;
+ if (N == F->second)
+ continue;
+ // Node for removal.
+ Erase.insert(*I);
+ }
+ NodeVect::iterator NewE = std::remove_if(Nodes.begin(), Nodes.end(),
+ in_set(Erase));
+ Nodes.resize(std::distance(Nodes.begin(), NewE));
+
+ DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes);
+}
+
+
+namespace {
+ template <typename T>
+ BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) {
+ DEBUG({
+ dbgs() << "NCD of {";
+ for (typename T::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ if (!*I)
+ continue;
+ BasicBlock *B = cast<BasicBlock>(*I);
+ dbgs() << ' ' << B->getName();
+ }
+ dbgs() << " }\n";
+ });
+
+ // Allow null basic blocks in Blocks. In such cases, return 0.
+ typename T::iterator I = Blocks.begin(), E = Blocks.end();
+ if (I == E || !*I)
+ return 0;
+ BasicBlock *Dom = cast<BasicBlock>(*I);
+ while (++I != E) {
+ BasicBlock *B = cast_or_null<BasicBlock>(*I);
+ Dom = B ? DT->findNearestCommonDominator(Dom, B) : 0;
+ if (!Dom)
+ return 0;
+ }
+ DEBUG(dbgs() << "computed:" << Dom->getName() << '\n');
+ return Dom;
+ }
+
+ template <typename T>
+ BasicBlock *nearest_common_dominatee(DominatorTree *DT, T &Blocks) {
+ // If two blocks, A and B, dominate a block C, then A dominates B,
+ // or B dominates A.
+ typename T::iterator I = Blocks.begin(), E = Blocks.end();
+ // Find the first non-null block.
+ while (I != E && !*I)
+ ++I;
+ if (I == E)
+ return DT->getRoot();
+ BasicBlock *DomB = cast<BasicBlock>(*I);
+ while (++I != E) {
+ if (!*I)
+ continue;
+ BasicBlock *B = cast<BasicBlock>(*I);
+ if (DT->dominates(B, DomB))
+ continue;
+ if (!DT->dominates(DomB, B))
+ return 0;
+ DomB = B;
+ }
+ return DomB;
+ }
+
+ // Find the first use in B of any value from Values. If no such use,
+ // return B->end().
+ template <typename T>
+ BasicBlock::iterator first_use_of_in_block(T &Values, BasicBlock *B) {
+ BasicBlock::iterator FirstUse = B->end(), BEnd = B->end();
+ typedef typename T::iterator iterator;
+ for (iterator I = Values.begin(), E = Values.end(); I != E; ++I) {
+ Value *V = *I;
+ // If V is used in a PHI node, the use belongs to the incoming block,
+ // not the block with the PHI node. In the incoming block, the use
+ // would be considered as being at the end of it, so it cannot
+ // influence the position of the first use (which is assumed to be
+ // at the end to start with).
+ if (isa<PHINode>(V))
+ continue;
+ if (!isa<Instruction>(V))
+ continue;
+ Instruction *In = cast<Instruction>(V);
+ if (In->getParent() != B)
+ continue;
+ BasicBlock::iterator It = In;
+ if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd))
+ FirstUse = It;
+ }
+ return FirstUse;
+ }
+
+ bool is_empty(const BasicBlock *B) {
+ return B->empty() || (&*B->begin() == B->getTerminator());
+ }
+}
+
+
+BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
+ NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+ DEBUG(dbgs() << "Loc for node:" << Node << '\n');
+ // Recalculate the placement for Node, assuming that the locations of
+ // its children in Loc are valid.
+ // Return 0 if there is no valid placement for Node (for example, it
+ // uses an index value that is not available at the location required
+ // to dominate all children, etc.).
+
+ // Find the nearest common dominator for:
+ // - all users, if the node is used, and
+ // - all children.
+ ValueVect Bs;
+ if (Node->Flags & GepNode::Used) {
+ // Append all blocks with uses of the original values to the
+ // block vector Bs.
+ NodeToUsesMap::iterator UF = Uses.find(Node);
+ assert(UF != Uses.end() && "Used node with no use information");
+ UseSet &Us = UF->second;
+ for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) {
+ Use *U = *I;
+ User *R = U->getUser();
+ if (!isa<Instruction>(R))
+ continue;
+ BasicBlock *PB = isa<PHINode>(R)
+ ? cast<PHINode>(R)->getIncomingBlock(*U)
+ : cast<Instruction>(R)->getParent();
+ Bs.push_back(PB);
+ }
+ }
+ // Append the location of each child.
+ NodeChildrenMap::iterator CF = NCM.find(Node);
+ if (CF != NCM.end()) {
+ NodeVect &Cs = CF->second;
+ for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) {
+ GepNode *CN = *I;
+ NodeToValueMap::iterator LF = Loc.find(CN);
+ // If the child is only used in GEP instructions (i.e. is not used in
+ // non-GEP instructions), the nearest dominator computed for it may
+ // have been null. In such case it won't have a location available.
+ if (LF == Loc.end())
+ continue;
+ Bs.push_back(LF->second);
+ }
+ }
+
+ BasicBlock *DomB = nearest_common_dominator(DT, Bs);
+ if (!DomB)
+ return 0;
+ // Check if the index used by Node dominates the computed dominator.
+ Instruction *IdxI = dyn_cast<Instruction>(Node->Idx);
+ if (IdxI && !DT->dominates(IdxI->getParent(), DomB))
+ return 0;
+
+ // Avoid putting nodes into empty blocks.
+ while (is_empty(DomB)) {
+ DomTreeNode *N = (*DT)[DomB]->getIDom();
+ if (!N)
+ break;
+ DomB = N->getBlock();
+ }
+
+ // Otherwise, DomB is fine. Update the location map.
+ Loc[Node] = DomB;
+ return DomB;
+}
+
+
+BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node,
+ NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+ DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n');
+ // Recalculate the placement of Node, after recursively recalculating the
+ // placements of all its children.
+ NodeChildrenMap::iterator CF = NCM.find(Node);
+ if (CF != NCM.end()) {
+ NodeVect &Cs = CF->second;
+ for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I)
+ recalculatePlacementRec(*I, NCM, Loc);
+ }
+ BasicBlock *LB = recalculatePlacement(Node, NCM, Loc);
+ DEBUG(dbgs() << "LocRec end for node:" << Node << '\n');
+ return LB;
+}
+
+
+bool HexagonCommonGEP::isInvariantIn(Value *Val, Loop *L) {
+ if (isa<Constant>(Val) || isa<Argument>(Val))
+ return true;
+ Instruction *In = dyn_cast<Instruction>(Val);
+ if (!In)
+ return false;
+ BasicBlock *HdrB = L->getHeader(), *DefB = In->getParent();
+ return DT->properlyDominates(DefB, HdrB);
+}
+
+
+bool HexagonCommonGEP::isInvariantIn(GepNode *Node, Loop *L) {
+ if (Node->Flags & GepNode::Root)
+ if (!isInvariantIn(Node->BaseVal, L))
+ return false;
+ return isInvariantIn(Node->Idx, L);
+}
+
+
+bool HexagonCommonGEP::isInMainPath(BasicBlock *B, Loop *L) {
+ BasicBlock *HB = L->getHeader();
+ BasicBlock *LB = L->getLoopLatch();
+ // B must post-dominate the loop header or dominate the loop latch.
+ if (PDT->dominates(B, HB))
+ return true;
+ if (LB && DT->dominates(B, LB))
+ return true;
+ return false;
+}
+
+
+namespace {
+ BasicBlock *preheader(DominatorTree *DT, Loop *L) {
+ if (BasicBlock *PH = L->getLoopPreheader())
+ return PH;
+ if (!OptSpeculate)
+ return 0;
+ DomTreeNode *DN = DT->getNode(L->getHeader());
+ if (!DN)
+ return 0;
+ return DN->getIDom()->getBlock();
+ }
+}
+
+
+BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node,
+ NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+ // Find the "topmost" location for Node: it must be dominated by both,
+ // its parent (or the BaseVal, if it's a root node), and by the index
+ // value.
+ ValueVect Bs;
+ if (Node->Flags & GepNode::Root) {
+ if (Instruction *PIn = dyn_cast<Instruction>(Node->BaseVal))
+ Bs.push_back(PIn->getParent());
+ } else {
+ Bs.push_back(Loc[Node->Parent]);
+ }
+ if (Instruction *IIn = dyn_cast<Instruction>(Node->Idx))
+ Bs.push_back(IIn->getParent());
+ BasicBlock *TopB = nearest_common_dominatee(DT, Bs);
+
+ // Traverse the loop nest upwards until we find a loop in which Node
+ // is no longer invariant, or until we get to the upper limit of Node's
+ // placement. The traversal will also stop when a suitable "preheader"
+ // cannot be found for a given loop. The "preheader" may actually be
+ // a regular block outside of the loop (i.e. not guarded), in which case
+ // the Node will be speculated.
+ // For nodes that are not in the main path of the containing loop (i.e.
+ // are not executed in each iteration), do not move them out of the loop.
+ BasicBlock *LocB = cast_or_null<BasicBlock>(Loc[Node]);
+ if (LocB) {
+ Loop *Lp = LI->getLoopFor(LocB);
+ while (Lp) {
+ if (!isInvariantIn(Node, Lp) || !isInMainPath(LocB, Lp))
+ break;
+ BasicBlock *NewLoc = preheader(DT, Lp);
+ if (!NewLoc || !DT->dominates(TopB, NewLoc))
+ break;
+ Lp = Lp->getParentLoop();
+ LocB = NewLoc;
+ }
+ }
+ Loc[Node] = LocB;
+
+ // Recursively compute the locations of all children nodes.
+ NodeChildrenMap::iterator CF = NCM.find(Node);
+ if (CF != NCM.end()) {
+ NodeVect &Cs = CF->second;
+ for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I)
+ adjustForInvariance(*I, NCM, Loc);
+ }
+ return LocB;
+}
+
+
+namespace {
+ struct LocationAsBlock {
+ LocationAsBlock(const NodeToValueMap &L) : Map(L) {}
+ const NodeToValueMap &Map;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS,
+ const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ;
+ raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) {
+ for (NodeToValueMap::const_iterator I = Loc.Map.begin(), E = Loc.Map.end();
+ I != E; ++I) {
+ OS << I->first << " -> ";
+ BasicBlock *B = cast<BasicBlock>(I->second);
+ OS << B->getName() << '(' << B << ')';
+ OS << '\n';
+ }
+ return OS;
+ }
+
+ inline bool is_constant(GepNode *N) {
+ return isa<ConstantInt>(N->Idx);
+ }
+}
+
+
+void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
+ NodeToValueMap &Loc) {
+ User *R = U->getUser();
+ DEBUG(dbgs() << "Separating chain for node (" << Node << ") user: "
+ << *R << '\n');
+ BasicBlock *PB = cast<Instruction>(R)->getParent();
+
+ GepNode *N = Node;
+ GepNode *C = 0, *NewNode = 0;
+ while (is_constant(N) && !(N->Flags & GepNode::Root)) {
+ // XXX if (single-use) dont-replicate;
+ GepNode *NewN = new (*Mem) GepNode(N);
+ Nodes.push_back(NewN);
+ Loc[NewN] = PB;
+
+ if (N == Node)
+ NewNode = NewN;
+ NewN->Flags &= ~GepNode::Used;
+ if (C)
+ C->Parent = NewN;
+ C = NewN;
+ N = N->Parent;
+ }
+ if (!NewNode)
+ return;
+
+ // Move over all uses that share the same user as U from Node to NewNode.
+ NodeToUsesMap::iterator UF = Uses.find(Node);
+ assert(UF != Uses.end());
+ UseSet &Us = UF->second;
+ UseSet NewUs;
+ for (UseSet::iterator I = Us.begin(); I != Us.end(); ) {
+ User *S = (*I)->getUser();
+ UseSet::iterator Nx = std::next(I);
+ if (S == R) {
+ NewUs.insert(*I);
+ Us.erase(I);
+ }
+ I = Nx;
+ }
+ if (Us.empty()) {
+ Node->Flags &= ~GepNode::Used;
+ Uses.erase(UF);
+ }
+
+ // Should at least have U in NewUs.
+ NewNode->Flags |= GepNode::Used;
+ DEBUG(dbgs() << "new node: " << NewNode << " " << *NewNode << '\n');
+ assert(!NewUs.empty());
+ Uses[NewNode] = NewUs;
+}
+
+
+void HexagonCommonGEP::separateConstantChains(GepNode *Node,
+ NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+ // First approximation: extract all chains.
+ NodeSet Ns;
+ nodes_for_root(Node, NCM, Ns);
+
+ DEBUG(dbgs() << "Separating constant chains for node: " << Node << '\n');
+ // Collect all used nodes together with the uses from loads and stores,
+ // where the GEP node could be folded into the load/store instruction.
+ NodeToUsesMap FNs; // Foldable nodes.
+ for (NodeSet::iterator I = Ns.begin(), E = Ns.end(); I != E; ++I) {
+ GepNode *N = *I;
+ if (!(N->Flags & GepNode::Used))
+ continue;
+ NodeToUsesMap::iterator UF = Uses.find(N);
+ assert(UF != Uses.end());
+ UseSet &Us = UF->second;
+ // Loads/stores that use the node N.
+ UseSet LSs;
+ for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) {
+ Use *U = *J;
+ User *R = U->getUser();
+ // We're interested in uses that provide the address. It can happen
+ // that the value may also be provided via GEP, but we won't handle
+ // those cases here for now.
+ if (LoadInst *Ld = dyn_cast<LoadInst>(R)) {
+ unsigned PtrX = LoadInst::getPointerOperandIndex();
+ if (&Ld->getOperandUse(PtrX) == U)
+ LSs.insert(U);
+ } else if (StoreInst *St = dyn_cast<StoreInst>(R)) {
+ unsigned PtrX = StoreInst::getPointerOperandIndex();
+ if (&St->getOperandUse(PtrX) == U)
+ LSs.insert(U);
+ }
+ }
+ // Even if the total use count is 1, separating the chain may still be
+ // beneficial, since the constant chain may be longer than the GEP alone
+ // would be (e.g. if the parent node has a constant index and also has
+ // other children).
+ if (!LSs.empty())
+ FNs.insert(std::make_pair(N, LSs));
+ }
+
+ DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs);
+
+ for (NodeToUsesMap::iterator I = FNs.begin(), E = FNs.end(); I != E; ++I) {
+ GepNode *N = I->first;
+ UseSet &Us = I->second;
+ for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J)
+ separateChainForNode(N, *J, Loc);
+ }
+}
+
+
+void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) {
+ // Compute the inverse of the Node.Parent links. Also, collect the set
+ // of root nodes.
+ NodeChildrenMap NCM;
+ NodeVect Roots;
+ invert_find_roots(Nodes, NCM, Roots);
+
+ // Compute the initial placement determined by the users' locations, and
+ // the locations of the child nodes.
+ for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+ recalculatePlacementRec(*I, NCM, Loc);
+
+ DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc));
+
+ if (OptEnableInv) {
+ for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+ adjustForInvariance(*I, NCM, Loc);
+
+ DEBUG(dbgs() << "Node placement after adjustment for invariance:\n"
+ << LocationAsBlock(Loc));
+ }
+ if (OptEnableConst) {
+ for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+ separateConstantChains(*I, NCM, Loc);
+ }
+ DEBUG(dbgs() << "Node use information:\n" << Uses);
+
+ // At the moment, there is no further refinement of the initial placement.
+ // Such a refinement could include splitting the nodes if they are placed
+ // too far from some of its users.
+
+ DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc));
+}
+
+
+Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
+ BasicBlock *LocB) {
+ DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName()
+ << " for nodes:\n" << NA);
+ unsigned Num = NA.size();
+ GepNode *RN = NA[0];
+ assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root");
+
+ Value *NewInst = 0;
+ Value *Input = RN->BaseVal;
+ Value **IdxList = new Value*[Num+1];
+ unsigned nax = 0;
+ do {
+ unsigned IdxC = 0;
+ // If the type of the input of the first node is not a pointer,
+ // we need to add an artificial i32 0 to the indices (because the
+ // actual input in the IR will be a pointer).
+ if (!NA[nax]->PTy->isPointerTy()) {
+ Type *Int32Ty = Type::getInt32Ty(*Ctx);
+ IdxList[IdxC++] = ConstantInt::get(Int32Ty, 0);
+ }
+
+ // Keep adding indices from NA until we have to stop and generate
+ // an "intermediate" GEP.
+ while (++nax <= Num) {
+ GepNode *N = NA[nax-1];
+ IdxList[IdxC++] = N->Idx;
+ if (nax < Num) {
+ // We have to stop, if the expected type of the output of this node
+ // is not the same as the input type of the next node.
+ Type *NextTy = next_type(N->PTy, N->Idx);
+ if (NextTy != NA[nax]->PTy)
+ break;
+ }
+ }
+ ArrayRef<Value*> A(IdxList, IdxC);
+ Type *InpTy = Input->getType();
+ Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType();
+ NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", At);
+ DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
+ Input = NewInst;
+ } while (nax <= Num);
+
+ delete[] IdxList;
+ return NewInst;
+}
+
+
+void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values,
+ NodeChildrenMap &NCM) {
+ NodeVect Work;
+ Work.push_back(Node);
+
+ while (!Work.empty()) {
+ NodeVect::iterator First = Work.begin();
+ GepNode *N = *First;
+ Work.erase(First);
+ if (N->Flags & GepNode::Used) {
+ NodeToUsesMap::iterator UF = Uses.find(N);
+ assert(UF != Uses.end() && "No use information for used node");
+ UseSet &Us = UF->second;
+ for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I)
+ Values.push_back((*I)->getUser());
+ }
+ NodeChildrenMap::iterator CF = NCM.find(N);
+ if (CF != NCM.end()) {
+ NodeVect &Cs = CF->second;
+ Work.insert(Work.end(), Cs.begin(), Cs.end());
+ }
+ }
+}
+
+
+void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
+ DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n');
+ NodeChildrenMap NCM;
+ NodeVect Roots;
+ // Compute the inversion again, since computing placement could alter
+ // "parent" relation between nodes.
+ invert_find_roots(Nodes, NCM, Roots);
+
+ while (!Roots.empty()) {
+ NodeVect::iterator First = Roots.begin();
+ GepNode *Root = *First, *Last = *First;
+ Roots.erase(First);
+
+ NodeVect NA; // Nodes to assemble.
+ // Append to NA all child nodes up to (and including) the first child
+ // that:
+ // (1) has more than 1 child, or
+ // (2) is used, or
+ // (3) has a child located in a different block.
+ bool LastUsed = false;
+ unsigned LastCN = 0;
+ // The location may be null if the computation failed (it can legitimately
+ // happen for nodes created from dead GEPs).
+ Value *LocV = Loc[Last];
+ if (!LocV)
+ continue;
+ BasicBlock *LastB = cast<BasicBlock>(LocV);
+ do {
+ NA.push_back(Last);
+ LastUsed = (Last->Flags & GepNode::Used);
+ if (LastUsed)
+ break;
+ NodeChildrenMap::iterator CF = NCM.find(Last);
+ LastCN = (CF != NCM.end()) ? CF->second.size() : 0;
+ if (LastCN != 1)
+ break;
+ GepNode *Child = CF->second.front();
+ BasicBlock *ChildB = cast_or_null<BasicBlock>(Loc[Child]);
+ if (ChildB != 0 && LastB != ChildB)
+ break;
+ Last = Child;
+ } while (true);
+
+ BasicBlock::iterator InsertAt = LastB->getTerminator();
+ if (LastUsed || LastCN > 0) {
+ ValueVect Urs;
+ getAllUsersForNode(Root, Urs, NCM);
+ BasicBlock::iterator FirstUse = first_use_of_in_block(Urs, LastB);
+ if (FirstUse != LastB->end())
+ InsertAt = FirstUse;
+ }
+
+ // Generate a new instruction for NA.
+ Value *NewInst = fabricateGEP(NA, InsertAt, LastB);
+
+ // Convert all the children of Last node into roots, and append them
+ // to the Roots list.
+ if (LastCN > 0) {
+ NodeVect &Cs = NCM[Last];
+ for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) {
+ GepNode *CN = *I;
+ CN->Flags &= ~GepNode::Internal;
+ CN->Flags |= GepNode::Root;
+ CN->BaseVal = NewInst;
+ Roots.push_back(CN);
+ }
+ }
+
+ // Lastly, if the Last node was used, replace all uses with the new GEP.
+ // The uses reference the original GEP values.
+ if (LastUsed) {
+ NodeToUsesMap::iterator UF = Uses.find(Last);
+ assert(UF != Uses.end() && "No use information found");
+ UseSet &Us = UF->second;
+ for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) {
+ Use *U = *I;
+ U->set(NewInst);
+ }
+ }
+ }
+}
+
+
+void HexagonCommonGEP::removeDeadCode() {
+ ValueVect BO;
+ BO.push_back(&Fn->front());
+
+ for (unsigned i = 0; i < BO.size(); ++i) {
+ BasicBlock *B = cast<BasicBlock>(BO[i]);
+ DomTreeNode *N = DT->getNode(B);
+ typedef GraphTraits<DomTreeNode*> GTN;
+ typedef GTN::ChildIteratorType Iter;
+ for (Iter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+ BO.push_back((*I)->getBlock());
+ }
+
+ for (unsigned i = BO.size(); i > 0; --i) {
+ BasicBlock *B = cast<BasicBlock>(BO[i-1]);
+ BasicBlock::InstListType &IL = B->getInstList();
+ typedef BasicBlock::InstListType::reverse_iterator reverse_iterator;
+ ValueVect Ins;
+ for (reverse_iterator I = IL.rbegin(), E = IL.rend(); I != E; ++I)
+ Ins.push_back(&*I);
+ for (ValueVect::iterator I = Ins.begin(), E = Ins.end(); I != E; ++I) {
+ Instruction *In = cast<Instruction>(*I);
+ if (isInstructionTriviallyDead(In))
+ In->eraseFromParent();
+ }
+ }
+}
+
+
+bool HexagonCommonGEP::runOnFunction(Function &F) {
+ // For now bail out on C++ exception handling.
+ for (Function::iterator A = F.begin(), Z = F.end(); A != Z; ++A)
+ for (BasicBlock::iterator I = A->begin(), E = A->end(); I != E; ++I)
+ if (isa<InvokeInst>(I) || isa<LandingPadInst>(I))
+ return false;
+
+ Fn = &F;
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ PDT = &getAnalysis<PostDominatorTree>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ Ctx = &F.getContext();
+
+ Nodes.clear();
+ Uses.clear();
+ NodeOrder.clear();
+
+ SpecificBumpPtrAllocator<GepNode> Allocator;
+ Mem = &Allocator;
+
+ collect();
+ common();
+
+ NodeToValueMap Loc;
+ computeNodePlacement(Loc);
+ materialize(Loc);
+ removeDeadCode();
+
+#ifdef XDEBUG
+ // Run this only when expensive checks are enabled.
+ verifyFunction(F);
+#endif
+ return true;
+}
+
+
+namespace llvm {
+ FunctionPass *createHexagonCommonGEP() {
+ return new HexagonCommonGEP();
+ }
+}
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 37ed173a79cd..ce10aeadef94 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -1,3 +1,12 @@
+//===--- HexagonExpandCondsets.cpp ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
// Replace mux instructions with the corresponding legal instructions.
// It is meant to work post-SSA, but still on virtual registers. It was
// originally placed between register coalescing and machine instruction
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 868f87e18413..29283c81877e 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -864,13 +864,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
// Check for an unused caller-saved register.
for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
MCPhysReg FreeReg = *CallerSavedRegs;
- if (MRI.isPhysRegUsed(FreeReg))
+ if (!MRI.reg_nodbg_empty(FreeReg))
continue;
// Check aliased register usage.
bool IsCurrentRegUsed = false;
for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
- if (MRI.isPhysRegUsed(*AI)) {
+ if (!MRI.reg_nodbg_empty(*AI)) {
IsCurrentRegUsed = true;
break;
}
@@ -959,8 +959,11 @@ bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF)
}
-void HexagonFrameLowering::processFunctionBeforeCalleeSavedScan(
- MachineFunction &MF, RegScavenger* RS) const {
+void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
auto &HRI = *HST.getRegisterInfo();
@@ -969,11 +972,9 @@ void HexagonFrameLowering::processFunctionBeforeCalleeSavedScan(
// If we have a function containing __builtin_eh_return we want to spill and
// restore all callee saved registers. Pretend that they are used.
if (HasEHReturn) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs;
++CSRegs)
- if (!MRI.isPhysRegUsed(*CSRegs))
- MRI.setPhysRegUsed(*CSRegs);
+ SavedRegs.set(*CSRegs);
}
const TargetRegisterClass &RC = Hexagon::IntRegsRegClass;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 89500cb85724..d39ee2c77195 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -45,7 +45,7 @@ public:
MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
bool targetHandlesStackFrameRounding() const override {
diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp
new file mode 100644
index 000000000000..4d32208bd5aa
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -0,0 +1,259 @@
+//===--- HexagonGenExtract.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U),
+ cl::Hidden, cl::desc("Cutoff for generating \"extract\""
+ " instructions"));
+
+// This prevents generating extract instructions that have the offset of 0.
+// One of the reasons for "extract" is to put a sequence of bits in a regis-
+// ter, starting at offset 0 (so that these bits can then be used by an
+// "insert"). If the bits are already at offset 0, it is better not to gene-
+// rate "extract", since logical bit operations can be merged into compound
+// instructions (as opposed to "extract").
+static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden,
+ cl::desc("No extract instruction with offset 0"));
+
+static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden,
+ cl::desc("Require & in extract patterns"));
+
+namespace llvm {
+ void initializeHexagonGenExtractPass(PassRegistry&);
+ FunctionPass *createHexagonGenExtract();
+}
+
+
+namespace {
+ class HexagonGenExtract : public FunctionPass {
+ public:
+ static char ID;
+ HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) {
+ initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const override {
+ return "Hexagon generate \"extract\" instructions";
+ }
+ virtual bool runOnFunction(Function &F) override;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+ private:
+ bool visitBlock(BasicBlock *B);
+ bool convert(Instruction *In);
+
+ unsigned ExtractCount;
+ DominatorTree *DT;
+ };
+
+ char HexagonGenExtract::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate "
+ "\"extract\" instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate "
+ "\"extract\" instructions", false, false)
+
+
+bool HexagonGenExtract::convert(Instruction *In) {
+ using namespace PatternMatch;
+ Value *BF = 0;
+ ConstantInt *CSL = 0, *CSR = 0, *CM = 0;
+ BasicBlock *BB = In->getParent();
+ LLVMContext &Ctx = BB->getContext();
+ bool LogicalSR;
+
+ // (and (shl (lshr x, #sr), #sl), #m)
+ LogicalSR = true;
+ bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)),
+ m_ConstantInt(CM)));
+
+ if (!Match) {
+ // (and (shl (ashr x, #sr), #sl), #m)
+ LogicalSR = false;
+ Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)),
+ m_ConstantInt(CM)));
+ }
+ if (!Match) {
+ // (and (shl x, #sl), #m)
+ LogicalSR = true;
+ CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
+ m_ConstantInt(CM)));
+ if (Match && NoSR0)
+ return false;
+ }
+ if (!Match) {
+ // (and (lshr x, #sr), #m)
+ LogicalSR = true;
+ CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CM)));
+ }
+ if (!Match) {
+ // (and (ashr x, #sr), #m)
+ LogicalSR = false;
+ CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CM)));
+ }
+ if (!Match) {
+ CM = 0;
+ // (shl (lshr x, #sr), #sl)
+ LogicalSR = true;
+ Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)));
+ }
+ if (!Match) {
+ CM = 0;
+ // (shl (ashr x, #sr), #sl)
+ LogicalSR = false;
+ Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)));
+ }
+ if (!Match)
+ return false;
+
+ Type *Ty = BF->getType();
+ if (!Ty->isIntegerTy())
+ return false;
+ unsigned BW = Ty->getPrimitiveSizeInBits();
+ if (BW != 32 && BW != 64)
+ return false;
+
+ uint32_t SR = CSR->getZExtValue();
+ uint32_t SL = CSL->getZExtValue();
+
+ if (!CM) {
+ // If there was no and, and the shift left did not remove all potential
+ // sign bits created by the shift right, then extractu cannot reproduce
+ // this value.
+ if (!LogicalSR && (SR > SL))
+ return false;
+ APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
+ CM = ConstantInt::get(Ctx, A);
+ }
+
+ // CM is the shifted-left mask. Shift it back right to remove the zero
+ // bits on least-significant positions.
+ APInt M = CM->getValue().lshr(SL);
+ uint32_t T = M.countTrailingOnes();
+
+ // During the shifts some of the bits will be lost. Calculate how many
+ // of the original value will remain after shift right and then left.
+ uint32_t U = BW - std::max(SL, SR);
+ // The width of the extracted field is the minimum of the original bits
+ // that remain after the shifts and the number of contiguous 1s in the mask.
+ uint32_t W = std::min(U, T);
+ if (W == 0)
+ return false;
+
+ // Check if the extracted bits are contained within the mask that it is
+ // and-ed with. The extract operation will copy these bits, and so the
+ // mask cannot any holes in it that would clear any of the bits of the
+ // extracted field.
+ if (!LogicalSR) {
+ // If the shift right was arithmetic, it could have included some 1 bits.
+ // It is still ok to generate extract, but only if the mask eliminates
+ // those bits (i.e. M does not have any bits set beyond U).
+ APInt C = APInt::getHighBitsSet(BW, BW-U);
+ if (M.intersects(C) || !APIntOps::isMask(W, M))
+ return false;
+ } else {
+ // Check if M starts with a contiguous sequence of W times 1 bits. Get
+ // the low U bits of M (which eliminates the 0 bits shifted in on the
+ // left), and check if the result is APInt's "mask":
+ if (!APIntOps::isMask(W, M.getLoBits(U)))
+ return false;
+ }
+
+ IRBuilder<> IRB(BB, In);
+ Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
+ : Intrinsic::hexagon_S2_extractup;
+ Module *Mod = BB->getParent()->getParent();
+ Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
+ Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
+ if (SL != 0)
+ NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
+ In->replaceAllUsesWith(NewIn);
+ return true;
+}
+
+
+bool HexagonGenExtract::visitBlock(BasicBlock *B) {
+ // Depth-first, bottom-up traversal.
+ DomTreeNode *DTN = DT->getNode(B);
+ typedef GraphTraits<DomTreeNode*> GTN;
+ typedef GTN::ChildIteratorType Iter;
+ for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
+ visitBlock((*I)->getBlock());
+
+ // Allow limiting the number of generated extracts for debugging purposes.
+ bool HasCutoff = ExtractCutoff.getPosition();
+ unsigned Cutoff = ExtractCutoff;
+
+ bool Changed = false;
+ BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin();
+ while (true) {
+ if (HasCutoff && (ExtractCount >= Cutoff))
+ return Changed;
+ bool Last = (I == Begin);
+ if (!Last)
+ NextI = std::prev(I);
+ Instruction *In = &*I;
+ bool Done = convert(In);
+ if (HasCutoff && Done)
+ ExtractCount++;
+ Changed |= Done;
+ if (Last)
+ break;
+ I = NextI;
+ }
+ return Changed;
+}
+
+
+bool HexagonGenExtract::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ bool Changed;
+
+ // Traverse the function bottom-up, to see super-expressions before their
+ // sub-expressions.
+ BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F);
+ Changed = visitBlock(Entry);
+
+ return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenExtract() {
+ return new HexagonGenExtract();
+}
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
new file mode 100644
index 000000000000..096da949e77b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -0,0 +1,1598 @@
+//===--- HexagonGenInsert.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexinsert"
+
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+#include <map>
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation."));
+// The distance cutoff is selected based on the precheckin-perf results:
+// cutoffs 20, 25, 35, and 40 are worse than 30.
+static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert "
+ "generation."));
+
+static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Enable timing of insert generation"));
+static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert "
+ "generation"));
+
+static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore);
+static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore);
+// Whether to construct constant values via "insert". Could eliminate constant
+// extenders, but often not practical.
+static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore);
+
+namespace {
+ // The preprocessor gets confused when the DEBUG macro is passed larger
+ // chunks of code. Use this function to detect debugging.
+ inline bool isDebug() {
+#ifndef NDEBUG
+ return ::llvm::DebugFlag && ::llvm::isCurrentDebugType(DEBUG_TYPE);
+#else
+ return false;
+#endif
+ }
+}
+
+
+namespace {
+ // Set of virtual registers, based on BitVector.
+ struct RegisterSet : private BitVector {
+ RegisterSet() : BitVector() {}
+ explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+ RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
+
+ using BitVector::clear;
+
+ unsigned find_first() const {
+ int First = BitVector::find_first();
+ if (First < 0)
+ return 0;
+ return x2v(First);
+ }
+
+ unsigned find_next(unsigned Prev) const {
+ int Next = BitVector::find_next(v2x(Prev));
+ if (Next < 0)
+ return 0;
+ return x2v(Next);
+ }
+
+ RegisterSet &insert(unsigned R) {
+ unsigned Idx = v2x(R);
+ ensure(Idx);
+ return static_cast<RegisterSet&>(BitVector::set(Idx));
+ }
+ RegisterSet &remove(unsigned R) {
+ unsigned Idx = v2x(R);
+ if (Idx >= size())
+ return *this;
+ return static_cast<RegisterSet&>(BitVector::reset(Idx));
+ }
+
+ RegisterSet &insert(const RegisterSet &Rs) {
+ return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
+ }
+ RegisterSet &remove(const RegisterSet &Rs) {
+ return static_cast<RegisterSet&>(BitVector::reset(Rs));
+ }
+
+ reference operator[](unsigned R) {
+ unsigned Idx = v2x(R);
+ ensure(Idx);
+ return BitVector::operator[](Idx);
+ }
+ bool operator[](unsigned R) const {
+ unsigned Idx = v2x(R);
+ assert(Idx < size());
+ return BitVector::operator[](Idx);
+ }
+ bool has(unsigned R) const {
+ unsigned Idx = v2x(R);
+ if (Idx >= size())
+ return false;
+ return BitVector::test(Idx);
+ }
+
+ bool empty() const {
+ return !BitVector::any();
+ }
+ bool includes(const RegisterSet &Rs) const {
+ // A.BitVector::test(B) <=> A-B != {}
+ return !Rs.BitVector::test(*this);
+ }
+ bool intersects(const RegisterSet &Rs) const {
+ return BitVector::anyCommon(Rs);
+ }
+
+ private:
+ void ensure(unsigned Idx) {
+ if (size() <= Idx)
+ resize(std::max(Idx+1, 32U));
+ }
+ static inline unsigned v2x(unsigned v) {
+ return TargetRegisterInfo::virtReg2Index(v);
+ }
+ static inline unsigned x2v(unsigned x) {
+ return TargetRegisterInfo::index2VirtReg(x);
+ }
+ };
+
+
+ struct PrintRegSet {
+ PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
+ : RS(S), TRI(RI) {}
+ friend raw_ostream &operator<< (raw_ostream &OS,
+ const PrintRegSet &P);
+ private:
+ const RegisterSet &RS;
+ const TargetRegisterInfo *TRI;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
+ OS << '{';
+ for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
+ OS << ' ' << PrintReg(R, P.TRI);
+ OS << " }";
+ return OS;
+ }
+}
+
+
+namespace {
+ // A convenience class to associate unsigned numbers (such as virtual
+ // registers) with unsigned numbers.
+ struct UnsignedMap : public DenseMap<unsigned,unsigned> {
+ UnsignedMap() : BaseType() {}
+ private:
+ typedef DenseMap<unsigned,unsigned> BaseType;
+ };
+
+ // A utility to establish an ordering between virtual registers:
+ // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
+ // This is meant as a cache for the ordering of virtual registers defined
+ // by a potentially expensive comparison function, or obtained by a proce-
+ // dure that should not be repeated each time two registers are compared.
+ struct RegisterOrdering : public UnsignedMap {
+ RegisterOrdering() : UnsignedMap() {}
+ unsigned operator[](unsigned VR) const {
+ const_iterator F = find(VR);
+ assert(F != end());
+ return F->second;
+ }
+ // Add operator(), so that objects of this class can be used as
+ // comparators in std::sort et al.
+ bool operator() (unsigned VR1, unsigned VR2) const {
+ return operator[](VR1) < operator[](VR2);
+ }
+ };
+}
+
+
+namespace {
+ // Ordering of bit values. This class does not have operator[], but
+ // is supplies a comparison operator() for use in std:: algorithms.
+ // The order is as follows:
+ // - 0 < 1 < ref
+ // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
+ // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
+ struct BitValueOrdering {
+ BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
+ bool operator() (const BitTracker::BitValue &V1,
+ const BitTracker::BitValue &V2) const;
+ const RegisterOrdering &BaseOrd;
+ };
+}
+
+
+bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
+ const BitTracker::BitValue &V2) const {
+ if (V1 == V2)
+ return false;
+ // V1==0 => true, V2==0 => false
+ if (V1.is(0) || V2.is(0))
+ return V1.is(0);
+ // Neither of V1,V2 is 0, and V1!=V2.
+ // V2==1 => false, V1==1 => true
+ if (V2.is(1) || V1.is(1))
+ return !V2.is(1);
+ // Both V1,V2 are refs.
+ unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
+ if (Ind1 != Ind2)
+ return Ind1 < Ind2;
+ // If V1.Pos==V2.Pos
+ assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
+ return V1.RefI.Pos < V2.RefI.Pos;
+}
+
+
+namespace {
+ // Cache for the BitTracker's cell map. Map lookup has a logarithmic
+ // complexity, this class will memoize the lookup results to reduce
+ // the access time for repeated lookups of the same cell.
+ struct CellMapShadow {
+ CellMapShadow(const BitTracker &T) : BT(T) {}
+ const BitTracker::RegisterCell &lookup(unsigned VR) {
+ unsigned RInd = TargetRegisterInfo::virtReg2Index(VR);
+ // Grow the vector to at least 32 elements.
+ if (RInd >= CVect.size())
+ CVect.resize(std::max(RInd+16, 32U), 0);
+ const BitTracker::RegisterCell *CP = CVect[RInd];
+ if (CP == 0)
+ CP = CVect[RInd] = &BT.lookup(VR);
+ return *CP;
+ }
+
+ const BitTracker &BT;
+
+ private:
+ typedef std::vector<const BitTracker::RegisterCell*> CellVectType;
+ CellVectType CVect;
+ };
+}
+
+
+namespace {
+ // Comparator class for lexicographic ordering of virtual registers
+ // according to the corresponding BitTracker::RegisterCell objects.
+ struct RegisterCellLexCompare {
+ RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
+ : BitOrd(BO), CM(M) {}
+ bool operator() (unsigned VR1, unsigned VR2) const;
+ private:
+ const BitValueOrdering &BitOrd;
+ CellMapShadow &CM;
+ };
+
+ // Comparator class for lexicographic ordering of virtual registers
+ // according to the specified bits of the corresponding BitTracker::
+ // RegisterCell objects.
+ // Specifically, this class will be used to compare bit B of a register
+ // cell for a selected virtual register R with bit N of any register
+ // other than R.
+ struct RegisterCellBitCompareSel {
+ RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
+ const BitValueOrdering &BO, CellMapShadow &M)
+ : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
+ bool operator() (unsigned VR1, unsigned VR2) const;
+ private:
+ const unsigned SelR, SelB;
+ const unsigned BitN;
+ const BitValueOrdering &BitOrd;
+ CellMapShadow &CM;
+ };
+}
+
+
+bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
+ // Ordering of registers, made up from two given orderings:
+ // - the ordering of the register numbers, and
+ // - the ordering of register cells.
+ // Def. R1 < R2 if:
+ // - cell(R1) < cell(R2), or
+ // - cell(R1) == cell(R2), and index(R1) < index(R2).
+ //
+ // For register cells, the ordering is lexicographic, with index 0 being
+ // the most significant.
+ if (VR1 == VR2)
+ return false;
+
+ const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
+ uint16_t W1 = RC1.width(), W2 = RC2.width();
+ for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
+ const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
+ if (V1 != V2)
+ return BitOrd(V1, V2);
+ }
+ // Cells are equal up until the common length.
+ if (W1 != W2)
+ return W1 < W2;
+
+ return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
+}
+
+
+bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
+ if (VR1 == VR2)
+ return false;
+ const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
+ const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
+ uint16_t W1 = RC1.width(), W2 = RC2.width();
+ uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
+ uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
+ // If Bit1 exceeds the width of VR1, then:
+ // - return false, if at the same time Bit2 exceeds VR2, or
+ // - return true, otherwise.
+ // (I.e. "a bit value that does not exist is less than any bit value
+ // that does exist".)
+ if (W1 <= Bit1)
+ return Bit2 < W2;
+ // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
+ if (W2 <= Bit2)
+ return false;
+
+ const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
+ if (V1 != V2)
+ return BitOrd(V1, V2);
+ return false;
+}
+
+
+namespace {
+ class OrderedRegisterList {
+ typedef std::vector<unsigned> ListType;
+ public:
+ OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {}
+ void insert(unsigned VR);
+ void remove(unsigned VR);
+ unsigned operator[](unsigned Idx) const {
+ assert(Idx < Seq.size());
+ return Seq[Idx];
+ }
+ unsigned size() const {
+ return Seq.size();
+ }
+
+ typedef ListType::iterator iterator;
+ typedef ListType::const_iterator const_iterator;
+ iterator begin() { return Seq.begin(); }
+ iterator end() { return Seq.end(); }
+ const_iterator begin() const { return Seq.begin(); }
+ const_iterator end() const { return Seq.end(); }
+
+ // Convenience function to convert an iterator to the corresponding index.
+ unsigned idx(iterator It) const { return It-begin(); }
+ private:
+ ListType Seq;
+ const RegisterOrdering &Ord;
+ };
+
+
+ struct PrintORL {
+ PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
+ : RL(L), TRI(RI) {}
+ friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
+ private:
+ const OrderedRegisterList &RL;
+ const TargetRegisterInfo *TRI;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
+ OS << '(';
+ OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
+ for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
+ if (I != B)
+ OS << ", ";
+ OS << PrintReg(*I, P.TRI);
+ }
+ OS << ')';
+ return OS;
+ }
+}
+
+
+void OrderedRegisterList::insert(unsigned VR) {
+ iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+ if (L == Seq.end())
+ Seq.push_back(VR);
+ else
+ Seq.insert(L, VR);
+}
+
+
+void OrderedRegisterList::remove(unsigned VR) {
+ iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+ assert(L != Seq.end());
+ Seq.erase(L);
+}
+
+
+namespace {
+ // A record of the insert form. The fields correspond to the operands
+ // of the "insert" instruction:
+ // ... = insert(SrcR, InsR, #Wdh, #Off)
+ struct IFRecord {
+ IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
+ : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
+ unsigned SrcR, InsR;
+ uint16_t Wdh, Off;
+ };
+
+ struct PrintIFR {
+ PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
+ : IFR(R), TRI(RI) {}
+ private:
+ const IFRecord &IFR;
+ const TargetRegisterInfo *TRI;
+ friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
+ unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
+ OS << '(' << PrintReg(SrcR, P.TRI) << ',' << PrintReg(InsR, P.TRI)
+ << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
+ return OS;
+ }
+
+ typedef std::pair<IFRecord,RegisterSet> IFRecordWithRegSet;
+}
+
+
+namespace llvm {
+ void initializeHexagonGenInsertPass(PassRegistry&);
+ FunctionPass *createHexagonGenInsert();
+}
+
+
+namespace {
+ class HexagonGenInsert : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonGenInsert() : MachineFunctionPass(ID), HII(0), HRI(0) {
+ initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const {
+ return "Hexagon generate \"insert\" instructions";
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef DenseMap<std::pair<unsigned,unsigned>,unsigned> PairMapType;
+
+ void buildOrderingMF(RegisterOrdering &RO) const;
+ void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
+ bool isIntClass(const TargetRegisterClass *RC) const;
+ bool isConstant(unsigned VR) const;
+ bool isSmallConstant(unsigned VR) const;
+ bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
+ uint16_t L, uint16_t S) const;
+ bool findSelfReference(unsigned VR) const;
+ bool findNonSelfReference(unsigned VR) const;
+ void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
+ void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
+ unsigned distance(const MachineBasicBlock *FromB,
+ const MachineBasicBlock *ToB, const UnsignedMap &RPO,
+ PairMapType &M) const;
+ unsigned distance(MachineBasicBlock::const_iterator FromI,
+ MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
+ PairMapType &M) const;
+ bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
+ void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
+ void findRemovableRegisters(unsigned VR, IFRecord IF,
+ RegisterSet &RMs) const;
+ void computeRemovableRegisters();
+
+ void pruneEmptyLists();
+ void pruneCoveredSets(unsigned VR);
+ void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
+ void pruneRegCopies(unsigned VR);
+ void pruneCandidates();
+ void selectCandidates();
+ bool generateInserts();
+
+ bool removeDeadCode(MachineDomTreeNode *N);
+
+ // IFRecord coupled with a set of potentially removable registers:
+ typedef std::vector<IFRecordWithRegSet> IFListType;
+ typedef DenseMap<unsigned,IFListType> IFMapType; // vreg -> IFListType
+
+ void dump_map() const;
+
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+
+ MachineFunction *MFN;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *MDT;
+ CellMapShadow *CMS;
+
+ RegisterOrdering BaseOrd;
+ RegisterOrdering CellOrd;
+ IFMapType IFMap;
+ };
+
+ char HexagonGenInsert::ID = 0;
+}
+
+
+void HexagonGenInsert::dump_map() const {
+ typedef IFMapType::const_iterator iterator;
+ for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ dbgs() << " " << PrintReg(I->first, HRI) << ":\n";
+ const IFListType &LL = I->second;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i)
+ dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", "
+ << PrintRegSet(LL[i].second, HRI) << '\n';
+ }
+}
+
+
+void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
+ unsigned Index = 0;
+ typedef MachineFunction::const_iterator mf_iterator;
+ for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) {
+ const MachineBasicBlock &B = *A;
+ if (!CMS->BT.reached(&B))
+ continue;
+ typedef MachineBasicBlock::const_iterator mb_iterator;
+ for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ const MachineInstr *MI = &*I;
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned R = MO.getReg();
+ assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
+ if (TargetRegisterInfo::isVirtualRegister(R))
+ RO.insert(std::make_pair(R, Index++));
+ }
+ }
+ }
+ }
+ // Since some virtual registers may have had their def and uses eliminated,
+ // they are no longer referenced in the code, and so they will not appear
+ // in the map.
+}
+
+
+void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
+ RegisterOrdering &RO) const {
+ // Create a vector of all virtual registers (collect them from the base
+ // ordering RB), and then sort it using the RegisterCell comparator.
+ BitValueOrdering BVO(RB);
+ RegisterCellLexCompare LexCmp(BVO, *CMS);
+ typedef std::vector<unsigned> SortableVectorType;
+ SortableVectorType VRs;
+ for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I)
+ VRs.push_back(I->first);
+ std::sort(VRs.begin(), VRs.end(), LexCmp);
+ // Transfer the results to the outgoing register ordering.
+ for (unsigned i = 0, n = VRs.size(); i < n; ++i)
+ RO.insert(std::make_pair(VRs[i], i));
+}
+
+
+inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
+ return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
+}
+
+
+bool HexagonGenInsert::isConstant(unsigned VR) const {
+ const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+ uint16_t W = RC.width();
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitTracker::BitValue &BV = RC[i];
+ if (BV.is(0) || BV.is(1))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
+ const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+ uint16_t W = RC.width();
+ if (W > 64)
+ return false;
+ uint64_t V = 0, B = 1;
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitTracker::BitValue &BV = RC[i];
+ if (BV.is(1))
+ V |= B;
+ else if (!BV.is(0))
+ return false;
+ B <<= 1;
+ }
+
+ // For 32-bit registers, consider: Rd = #s16.
+ if (W == 32)
+ return isInt<16>(V);
+
+ // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
+ return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
+}
+
+
+bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
+ unsigned InsR, uint16_t L, uint16_t S) const {
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
+ const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
+ // Only integet (32-/64-bit) register classes.
+ if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
+ return false;
+ // The "source" register must be of the same class as DstR.
+ if (DstRC != SrcRC)
+ return false;
+ if (DstRC == InsRC)
+ return true;
+ // A 64-bit register can only be generated from other 64-bit registers.
+ if (DstRC == &Hexagon::DoubleRegsRegClass)
+ return false;
+ // Otherwise, the L and S cannot span 32-bit word boundary.
+ if (S < 32 && S+L > 32)
+ return false;
+ return true;
+}
+
+
+bool HexagonGenInsert::findSelfReference(unsigned VR) const {
+ const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+ for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+ const BitTracker::BitValue &V = RC[i];
+ if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
+ BitTracker::RegisterCell RC = CMS->lookup(VR);
+ for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+ const BitTracker::BitValue &V = RC[i];
+ if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
+ return true;
+ }
+ return false;
+}
+
+
+void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
+ RegisterSet &Defs) const {
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned R = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Defs.insert(R);
+ }
+}
+
+
+void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
+ RegisterSet &Uses) const {
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned R = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Uses.insert(R);
+ }
+}
+
+
+unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
+ const MachineBasicBlock *ToB, const UnsignedMap &RPO,
+ PairMapType &M) const {
+ // Forward distance from the end of a block to the beginning of it does
+ // not make sense. This function should not be called with FromB == ToB.
+ assert(FromB != ToB);
+
+ unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
+ // If we have already computed it, return the cached result.
+ PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
+ if (F != M.end())
+ return F->second;
+ unsigned ToRPO = RPO.lookup(ToN);
+
+ unsigned MaxD = 0;
+ typedef MachineBasicBlock::const_pred_iterator pred_iterator;
+ for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) {
+ const MachineBasicBlock *PB = *I;
+ // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
+ // along that path will be 0, and we don't need to do any calculations
+ // on it.
+ if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
+ continue;
+ unsigned D = PB->size() + distance(FromB, PB, RPO, M);
+ if (D > MaxD)
+ MaxD = D;
+ }
+
+ // Memoize the result for later lookup.
+ M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
+ return MaxD;
+}
+
+
+unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
+ MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
+ PairMapType &M) const {
+ const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
+ if (FB == TB)
+ return std::distance(FromI, ToI);
+ unsigned D1 = std::distance(TB->begin(), ToI);
+ unsigned D2 = distance(FB, TB, RPO, M);
+ unsigned D3 = std::distance(FromI, FB->end());
+ return D1+D2+D3;
+}
+
+
+bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
+ OrderedRegisterList &AVs) {
+ if (isDebug()) {
+ dbgs() << LLVM_FUNCTION_NAME << ": " << PrintReg(VR, HRI)
+ << " AVs: " << PrintORL(AVs, HRI) << "\n";
+ }
+ if (AVs.size() == 0)
+ return false;
+
+ typedef OrderedRegisterList::iterator iterator;
+ BitValueOrdering BVO(BaseOrd);
+ const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+ uint16_t W = RC.width();
+
+ typedef std::pair<unsigned,uint16_t> RSRecord; // (reg,shift)
+ typedef std::vector<RSRecord> RSListType;
+ // Have a map, with key being the matching prefix length, and the value
+ // being the list of pairs (R,S), where R's prefix matches VR at S.
+ // (DenseMap<uint16_t,RSListType> fails to instantiate.)
+ typedef DenseMap<unsigned,RSListType> LRSMapType;
+ LRSMapType LM;
+
+ // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
+ // and find matching prefixes from AVs with the rotated RC. Such a prefix
+ // would match a string of bits (of length L) in RC starting at S.
+ for (uint16_t S = 0; S < W; ++S) {
+ iterator B = AVs.begin(), E = AVs.end();
+ // The registers in AVs are ordered according to the lexical order of
+ // the corresponding register cells. This means that the range of regis-
+ // ters in AVs that match a prefix of length L+1 will be contained in
+ // the range that matches a prefix of length L. This means that we can
+ // keep narrowing the search space as the prefix length goes up. This
+ // helps reduce the overall complexity of the search.
+ uint16_t L;
+ for (L = 0; L < W-S; ++L) {
+ // Compare against VR's bits starting at S, which emulates rotation
+ // of VR by S.
+ RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
+ iterator NewB = std::lower_bound(B, E, VR, RCB);
+ iterator NewE = std::upper_bound(NewB, E, VR, RCB);
+ // For the registers that are eliminated from the next range, L is
+ // the longest prefix matching VR at position S (their prefixes
+ // differ from VR at S+L). If L>0, record this information for later
+ // use.
+ if (L > 0) {
+ for (iterator I = B; I != NewB; ++I)
+ LM[L].push_back(std::make_pair(*I, S));
+ for (iterator I = NewE; I != E; ++I)
+ LM[L].push_back(std::make_pair(*I, S));
+ }
+ B = NewB, E = NewE;
+ if (B == E)
+ break;
+ }
+ // Record the final register range. If this range is non-empty, then
+ // L=W-S.
+ assert(B == E || L == W-S);
+ if (B != E) {
+ for (iterator I = B; I != E; ++I)
+ LM[L].push_back(std::make_pair(*I, S));
+ // If B!=E, then we found a range of registers whose prefixes cover the
+ // rest of VR from position S. There is no need to further advance S.
+ break;
+ }
+ }
+
+ if (isDebug()) {
+ dbgs() << "Prefixes matching register " << PrintReg(VR, HRI) << "\n";
+ for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) {
+ dbgs() << " L=" << I->first << ':';
+ const RSListType &LL = I->second;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i)
+ dbgs() << " (" << PrintReg(LL[i].first, HRI) << ",@"
+ << LL[i].second << ')';
+ dbgs() << '\n';
+ }
+ }
+
+
+ bool Recorded = false;
+
+ for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) {
+ unsigned SrcR = *I;
+ int FDi = -1, LDi = -1; // First/last different bit.
+ const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
+ uint16_t AW = AC.width();
+ for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
+ if (RC[i] == AC[i])
+ continue;
+ if (FDi == -1)
+ FDi = i;
+ LDi = i;
+ }
+ if (FDi == -1)
+ continue; // TODO (future): Record identical registers.
+ // Look for a register whose prefix could patch the range [FD..LD]
+ // where VR and SrcR differ.
+ uint16_t FD = FDi, LD = LDi; // Switch to unsigned type.
+ uint16_t MinL = LD-FD+1;
+ for (uint16_t L = MinL; L < W; ++L) {
+ LRSMapType::iterator F = LM.find(L);
+ if (F == LM.end())
+ continue;
+ RSListType &LL = F->second;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i) {
+ uint16_t S = LL[i].second;
+ // MinL is the minimum length of the prefix. Any length above MinL
+ // allows some flexibility as to where the prefix can start:
+ // given the extra length EL=L-MinL, the prefix must start between
+ // max(0,FD-EL) and FD.
+ if (S > FD) // Starts too late.
+ continue;
+ uint16_t EL = L-MinL;
+ uint16_t LowS = (EL < FD) ? FD-EL : 0;
+ if (S < LowS) // Starts too early.
+ continue;
+ unsigned InsR = LL[i].first;
+ if (!isValidInsertForm(VR, SrcR, InsR, L, S))
+ continue;
+ if (isDebug()) {
+ dbgs() << PrintReg(VR, HRI) << " = insert(" << PrintReg(SrcR, HRI)
+ << ',' << PrintReg(InsR, HRI) << ",#" << L << ",#"
+ << S << ")\n";
+ }
+ IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
+ IFMap[VR].push_back(RR);
+ Recorded = true;
+ }
+ }
+ }
+
+ return Recorded;
+}
+
+
+void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
+ OrderedRegisterList &AVs) {
+ if (isDebug())
+ dbgs() << "visiting block BB#" << B->getNumber() << "\n";
+
+ // First, check if this block is reachable at all. If not, the bit tracker
+ // will not have any information about registers in it.
+ if (!CMS->BT.reached(B))
+ return;
+
+ bool DoConst = OptConst;
+ // Keep a separate set of registers defined in this block, so that we
+ // can remove them from the list of available registers once all DT
+ // successors have been processed.
+ RegisterSet BlockDefs, InsDefs;
+ for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ InsDefs.clear();
+ getInstrDefs(MI, InsDefs);
+ // Leave those alone. They are more transparent than "insert".
+ bool Skip = MI->isCopy() || MI->isRegSequence();
+
+ if (!Skip) {
+ // Visit all defined registers, and attempt to find the corresponding
+ // "insert" representations.
+ for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
+ // Do not collect registers that are known to be compile-time cons-
+ // tants, unless requested.
+ if (!DoConst && isConstant(VR))
+ continue;
+ // If VR's cell contains a reference to VR, then VR cannot be defined
+ // via "insert". If VR is a constant that can be generated in a single
+ // instruction (without constant extenders), generating it via insert
+ // makes no sense.
+ if (findSelfReference(VR) || isSmallConstant(VR))
+ continue;
+
+ findRecordInsertForms(VR, AVs);
+ }
+ }
+
+ // Insert the defined registers into the list of available registers
+ // after they have been processed.
+ for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
+ AVs.insert(VR);
+ BlockDefs.insert(InsDefs);
+ }
+
+ MachineDomTreeNode *N = MDT->getNode(B);
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ typedef GTN::ChildIteratorType ChildIter;
+ for (ChildIter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ collectInBlock(SB, AVs);
+ }
+
+ for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
+ AVs.remove(VR);
+}
+
+
+void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
+ RegisterSet &RMs) const {
+ // For a given register VR and a insert form, find the registers that are
+ // used by the current definition of VR, and which would no longer be
+ // needed for it after the definition of VR is replaced with the insert
+ // form. These are the registers that could potentially become dead.
+ RegisterSet Regs[2];
+
+ unsigned S = 0; // Register set selector.
+ Regs[S].insert(VR);
+
+ while (!Regs[S].empty()) {
+ // Breadth-first search.
+ unsigned OtherS = 1-S;
+ Regs[OtherS].clear();
+ for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
+ Regs[S].remove(R);
+ if (R == IF.SrcR || R == IF.InsR)
+ continue;
+ // Check if a given register has bits that are references to any other
+ // registers. This is to detect situations where the instruction that
+ // defines register R takes register Q as an operand, but R itself does
+ // not contain any bits from Q. Loads are examples of how this could
+ // happen:
+ // R = load Q
+ // In this case (assuming we do not have any knowledge about the loaded
+ // value), we must not treat R as a "conveyance" of the bits from Q.
+ // (The information in BT about R's bits would have them as constants,
+ // in case of zero-extending loads, or refs to R.)
+ if (!findNonSelfReference(R))
+ continue;
+ RMs.insert(R);
+ const MachineInstr *DefI = MRI->getVRegDef(R);
+ assert(DefI);
+ // Do not iterate past PHI nodes to avoid infinite loops. This can
+ // make the final set a bit less accurate, but the removable register
+ // sets are an approximation anyway.
+ if (DefI->isPHI())
+ continue;
+ getInstrUses(DefI, Regs[OtherS]);
+ }
+ S = OtherS;
+ }
+ // The register VR is added to the list as a side-effect of the algorithm,
+ // but it is not "potentially removable". A potentially removable register
+ // is one that may become unused (dead) after conversion to the insert form
+ // IF, and obviously VR (or its replacement) will not become dead by apply-
+ // ing IF.
+ RMs.remove(VR);
+}
+
+
+void HexagonGenInsert::computeRemovableRegisters() {
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ IFListType &LL = I->second;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i)
+ findRemovableRegisters(I->first, LL[i].first, LL[i].second);
+ }
+}
+
+
+void HexagonGenInsert::pruneEmptyLists() {
+ // Remove all entries from the map, where the register has no insert forms
+ // associated with it.
+ typedef SmallVector<IFMapType::iterator,16> IterListType;
+ IterListType Prune;
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ if (I->second.size() == 0)
+ Prune.push_back(I);
+ }
+ for (unsigned i = 0, n = Prune.size(); i < n; ++i)
+ IFMap.erase(Prune[i]);
+}
+
+
+void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
+ IFMapType::iterator F = IFMap.find(VR);
+ assert(F != IFMap.end());
+ IFListType &LL = F->second;
+
+ // First, examine the IF candidates for register VR whose removable-regis-
+ // ter sets are empty. This means that a given candidate will not help eli-
+ // minate any registers, but since "insert" is not a constant-extendable
+ // instruction, using such a candidate may reduce code size if the defini-
+ // tion of VR is constant-extended.
+ // If there exists a candidate with a non-empty set, the ones with empty
+ // sets will not be used and can be removed.
+ MachineInstr *DefVR = MRI->getVRegDef(VR);
+ bool DefEx = HII->isConstExtended(DefVR);
+ bool HasNE = false;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i) {
+ if (LL[i].second.empty())
+ continue;
+ HasNE = true;
+ break;
+ }
+ if (!DefEx || HasNE) {
+ // The definition of VR is not constant-extended, or there is a candidate
+ // with a non-empty set. Remove all candidates with empty sets.
+ auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
+ return IR.second.empty();
+ };
+ auto End = std::remove_if(LL.begin(), LL.end(), IsEmpty);
+ if (End != LL.end())
+ LL.erase(End, LL.end());
+ } else {
+ // The definition of VR is constant-extended, and all candidates have
+ // empty removable-register sets. Pick the maximum candidate, and remove
+ // all others. The "maximum" does not have any special meaning here, it
+ // is only so that the candidate that will remain on the list is selec-
+ // ted deterministically.
+ IFRecord MaxIF = LL[0].first;
+ for (unsigned i = 1, n = LL.size(); i < n; ++i) {
+ // If LL[MaxI] < LL[i], then MaxI = i.
+ const IFRecord &IF = LL[i].first;
+ unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
+ unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
+ if (M0 > R0)
+ continue;
+ if (M0 == R0) {
+ if (M1 > R1)
+ continue;
+ if (M1 == R1) {
+ if (MaxIF.Wdh > IF.Wdh)
+ continue;
+ if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
+ continue;
+ }
+ }
+ // MaxIF < IF.
+ MaxIF = IF;
+ }
+ // Remove everything except the maximum candidate. All register sets
+ // are empty, so no need to preserve anything.
+ LL.clear();
+ LL.push_back(std::make_pair(MaxIF, RegisterSet()));
+ }
+
+ // Now, remove those whose sets of potentially removable registers are
+ // contained in another IF candidate for VR. For example, given these
+ // candidates for vreg45,
+ // %vreg45:
+ // (%vreg44,%vreg41,#9,#8), { %vreg42 }
+ // (%vreg43,%vreg41,#9,#8), { %vreg42 %vreg44 }
+ // remove the first one, since it is contained in the second one.
+ for (unsigned i = 0, n = LL.size(); i < n; ) {
+ const RegisterSet &RMi = LL[i].second;
+ unsigned j = 0;
+ while (j < n) {
+ if (j != i && LL[j].second.includes(RMi))
+ break;
+ j++;
+ }
+ if (j == n) { // RMi not contained in anything else.
+ i++;
+ continue;
+ }
+ LL.erase(LL.begin()+i);
+ n = LL.size();
+ }
+}
+
+
+void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
+ PairMapType &M) {
+ IFMapType::iterator F = IFMap.find(VR);
+ assert(F != IFMap.end());
+ IFListType &LL = F->second;
+ unsigned Cutoff = VRegDistCutoff;
+ const MachineInstr *DefV = MRI->getVRegDef(VR);
+
+ for (unsigned i = LL.size(); i > 0; --i) {
+ unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
+ const MachineInstr *DefS = MRI->getVRegDef(SR);
+ const MachineInstr *DefI = MRI->getVRegDef(IR);
+ unsigned DSV = distance(DefS, DefV, RPO, M);
+ if (DSV < Cutoff) {
+ unsigned DIV = distance(DefI, DefV, RPO, M);
+ if (DIV < Cutoff)
+ continue;
+ }
+ LL.erase(LL.begin()+(i-1));
+ }
+}
+
+
+void HexagonGenInsert::pruneRegCopies(unsigned VR) {
+ IFMapType::iterator F = IFMap.find(VR);
+ assert(F != IFMap.end());
+ IFListType &LL = F->second;
+
+ auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
+ return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
+ };
+ auto End = std::remove_if(LL.begin(), LL.end(), IsCopy);
+ if (End != LL.end())
+ LL.erase(End, LL.end());
+}
+
+
+void HexagonGenInsert::pruneCandidates() {
+ // Remove candidates that are not beneficial, regardless of the final
+ // selection method.
+ // First, remove candidates whose potentially removable set is a subset
+ // of another candidate's set.
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+ pruneCoveredSets(I->first);
+
+ UnsignedMap RPO;
+ typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
+ RPOTType RPOT(MFN);
+ unsigned RPON = 0;
+ for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+ RPO[(*I)->getNumber()] = RPON++;
+
+ PairMapType Memo; // Memoization map for distance calculation.
+ // Remove candidates that would use registers defined too far away.
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+ pruneUsesTooFar(I->first, RPO, Memo);
+
+ pruneEmptyLists();
+
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+ pruneRegCopies(I->first);
+}
+
+
+namespace {
+ // Class for comparing IF candidates for registers that have multiple of
+ // them. The smaller the candidate, according to this ordering, the better.
+ // First, compare the number of zeros in the associated potentially remova-
+ // ble register sets. "Zero" indicates that the register is very likely to
+ // become dead after this transformation.
+ // Second, compare "averages", i.e. use-count per size. The lower wins.
+ // After that, it does not really matter which one is smaller. Resolve
+ // the tie in some deterministic way.
+ struct IFOrdering {
+ IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
+ : UseC(UC), BaseOrd(BO) {}
+ bool operator() (const IFRecordWithRegSet &A,
+ const IFRecordWithRegSet &B) const;
+ private:
+ void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
+ unsigned &Sum) const;
+ const UnsignedMap &UseC;
+ const RegisterOrdering &BaseOrd;
+ };
+}
+
+
+bool IFOrdering::operator() (const IFRecordWithRegSet &A,
+ const IFRecordWithRegSet &B) const {
+ unsigned SizeA = 0, ZeroA = 0, SumA = 0;
+ unsigned SizeB = 0, ZeroB = 0, SumB = 0;
+ stats(A.second, SizeA, ZeroA, SumA);
+ stats(B.second, SizeB, ZeroB, SumB);
+
+ // We will pick the minimum element. The more zeros, the better.
+ if (ZeroA != ZeroB)
+ return ZeroA > ZeroB;
+ // Compare SumA/SizeA with SumB/SizeB, lower is better.
+ uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
+ if (AvgA != AvgB)
+ return AvgA < AvgB;
+
+ // The sets compare identical so far. Resort to comparing the IF records.
+ // The actual values don't matter, this is only for determinism.
+ unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
+ if (OSA != OSB)
+ return OSA < OSB;
+ unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
+ if (OIA != OIB)
+ return OIA < OIB;
+ if (A.first.Wdh != B.first.Wdh)
+ return A.first.Wdh < B.first.Wdh;
+ return A.first.Off < B.first.Off;
+}
+
+
+void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
+ unsigned &Sum) const {
+ for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
+ UnsignedMap::const_iterator F = UseC.find(R);
+ assert(F != UseC.end());
+ unsigned UC = F->second;
+ if (UC == 0)
+ Zero++;
+ Sum += UC;
+ Size++;
+ }
+}
+
+
+void HexagonGenInsert::selectCandidates() {
+ // Some registers may have multiple valid candidates. Pick the best one
+ // (or decide not to use any).
+
+ // Compute the "removability" measure of R:
+ // For each potentially removable register R, record the number of regis-
+ // ters with IF candidates, where R appears in at least one set.
+ RegisterSet AllRMs;
+ UnsignedMap UseC, RemC;
+ IFMapType::iterator End = IFMap.end();
+
+ for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+ const IFListType &LL = I->second;
+ RegisterSet TT;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i)
+ TT.insert(LL[i].second);
+ for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
+ RemC[R]++;
+ AllRMs.insert(TT);
+ }
+
+ for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
+ typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
+ typedef SmallSet<const MachineInstr*,16> InstrSet;
+ InstrSet UIs;
+ // Count as the number of instructions in which R is used, not the
+ // number of operands.
+ use_iterator E = MRI->use_nodbg_end();
+ for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
+ UIs.insert(I->getParent());
+ unsigned C = UIs.size();
+ // Calculate a measure, which is the number of instructions using R,
+ // minus the "removability" count computed earlier.
+ unsigned D = RemC[R];
+ UseC[R] = (C > D) ? C-D : 0; // doz
+ }
+
+
+ bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
+ if (!SelectAll0 && !SelectHas0)
+ SelectAll0 = true;
+
+ // The smaller the number UseC for a given register R, the "less used"
+ // R is aside from the opportunities for removal offered by generating
+ // "insert" instructions.
+ // Iterate over the IF map, and for those registers that have multiple
+ // candidates, pick the minimum one according to IFOrdering.
+ IFOrdering IFO(UseC, BaseOrd);
+ for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+ IFListType &LL = I->second;
+ if (LL.empty())
+ continue;
+ // Get the minimum element, remember it and clear the list. If the
+ // element found is adequate, we will put it back on the list, other-
+ // wise the list will remain empty, and the entry for this register
+ // will be removed (i.e. this register will not be replaced by insert).
+ IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO);
+ assert(MinI != LL.end());
+ IFRecordWithRegSet M = *MinI;
+ LL.clear();
+
+ // We want to make sure that this replacement will have a chance to be
+ // beneficial, and that means that we want to have indication that some
+ // register will be removed. The most likely registers to be eliminated
+ // are the use operands in the definition of I->first. Accept/reject a
+ // candidate based on how many of its uses it can potentially eliminate.
+
+ RegisterSet Us;
+ const MachineInstr *DefI = MRI->getVRegDef(I->first);
+ getInstrUses(DefI, Us);
+ bool Accept = false;
+
+ if (SelectAll0) {
+ bool All0 = true;
+ for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
+ if (UseC[R] == 0)
+ continue;
+ All0 = false;
+ break;
+ }
+ Accept = All0;
+ } else if (SelectHas0) {
+ bool Has0 = false;
+ for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
+ if (UseC[R] != 0)
+ continue;
+ Has0 = true;
+ break;
+ }
+ Accept = Has0;
+ }
+ if (Accept)
+ LL.push_back(M);
+ }
+
+ // Remove candidates that add uses of removable registers, unless the
+ // removable registers are among replacement candidates.
+ // Recompute the removable registers, since some candidates may have
+ // been eliminated.
+ AllRMs.clear();
+ for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+ const IFListType &LL = I->second;
+ if (LL.size() > 0)
+ AllRMs.insert(LL[0].second);
+ }
+ for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+ IFListType &LL = I->second;
+ if (LL.size() == 0)
+ continue;
+ unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
+ if (AllRMs[SR] || AllRMs[IR])
+ LL.clear();
+ }
+
+ pruneEmptyLists();
+}
+
+
+bool HexagonGenInsert::generateInserts() {
+ // Create a new register for each one from IFMap, and store them in the
+ // map.
+ UnsignedMap RegMap;
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ unsigned VR = I->first;
+ const TargetRegisterClass *RC = MRI->getRegClass(VR);
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ RegMap[VR] = NewVR;
+ }
+
+ // We can generate the "insert" instructions using potentially stale re-
+ // gisters: SrcR and InsR for a given VR may be among other registers that
+ // are also replaced. This is fine, we will do the mass "rauw" a bit later.
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ MachineInstr *MI = MRI->getVRegDef(I->first);
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned NewR = RegMap[I->first];
+ bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
+ const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
+ : HII->get(Hexagon::S2_insertp);
+ IFRecord IF = I->second[0].first;
+ unsigned Wdh = IF.Wdh, Off = IF.Off;
+ unsigned InsS = 0;
+ if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
+ InsS = Hexagon::subreg_loreg;
+ if (Off >= 32) {
+ InsS = Hexagon::subreg_hireg;
+ Off -= 32;
+ }
+ }
+ // Advance to the proper location for inserting instructions. This could
+ // be B.end().
+ MachineBasicBlock::iterator At = MI;
+ if (MI->isPHI())
+ At = B.getFirstNonPHI();
+
+ BuildMI(B, At, DL, D, NewR)
+ .addReg(IF.SrcR)
+ .addReg(IF.InsR, 0, InsS)
+ .addImm(Wdh)
+ .addImm(Off);
+
+ MRI->clearKillFlags(IF.SrcR);
+ MRI->clearKillFlags(IF.InsR);
+ }
+
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ MachineInstr *DefI = MRI->getVRegDef(I->first);
+ MRI->replaceRegWith(I->first, RegMap[I->first]);
+ DefI->eraseFromParent();
+ }
+
+ return true;
+}
+
+
+bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
+ bool Changed = false;
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+ Changed |= removeDeadCode(*I);
+
+ MachineBasicBlock *B = N->getBlock();
+ std::vector<MachineInstr*> Instrs;
+ for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
+ Instrs.push_back(&*I);
+
+ for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+ unsigned Opc = MI->getOpcode();
+ // Do not touch lifetime markers. This is why the target-independent DCE
+ // cannot be used.
+ if (Opc == TargetOpcode::LIFETIME_START ||
+ Opc == TargetOpcode::LIFETIME_END)
+ continue;
+ bool Store = false;
+ if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store))
+ continue;
+
+ bool AllDead = true;
+ SmallVector<unsigned,2> Regs;
+ for (ConstMIOperands Op(MI); Op.isValid(); ++Op) {
+ if (!Op->isReg() || !Op->isDef())
+ continue;
+ unsigned R = Op->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R) ||
+ !MRI->use_nodbg_empty(R)) {
+ AllDead = false;
+ break;
+ }
+ Regs.push_back(R);
+ }
+ if (!AllDead)
+ continue;
+
+ B->erase(MI);
+ for (unsigned I = 0, N = Regs.size(); I != N; ++I)
+ MRI->markUsesInDebugValueAsUndef(Regs[I]);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
+ bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
+ bool Changed = false;
+ TimerGroup __G("hexinsert");
+ NamedRegionTimer __T("hexinsert", Timing && !TimingDetail);
+
+ // Sanity check: one, but not both.
+ assert(!OptSelectAll0 || !OptSelectHas0);
+
+ IFMap.clear();
+ BaseOrd.clear();
+ CellOrd.clear();
+
+ const auto &ST = MF.getSubtarget<HexagonSubtarget>();
+ HII = ST.getInstrInfo();
+ HRI = ST.getRegisterInfo();
+ MFN = &MF;
+ MRI = &MF.getRegInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+
+ // Clean up before any further processing, so that dead code does not
+ // get used in a newly generated "insert" instruction. Have a custom
+ // version of DCE that preserves lifetime markers. Without it, merging
+ // of stack objects can fail to recognize and merge disjoint objects
+ // leading to unnecessary stack growth.
+ Changed |= removeDeadCode(MDT->getRootNode());
+
+ const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
+ BitTracker BTLoc(HE, MF);
+ BTLoc.trace(isDebug());
+ BTLoc.run();
+ CellMapShadow MS(BTLoc);
+ CMS = &MS;
+
+ buildOrderingMF(BaseOrd);
+ buildOrderingBT(BaseOrd, CellOrd);
+
+ if (isDebug()) {
+ dbgs() << "Cell ordering:\n";
+ for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end();
+ I != E; ++I) {
+ unsigned VR = I->first, Pos = I->second;
+ dbgs() << PrintReg(VR, HRI) << " -> " << Pos << "\n";
+ }
+ }
+
+ // Collect candidates for conversion into the insert forms.
+ MachineBasicBlock *RootB = MDT->getRoot();
+ OrderedRegisterList AvailR(CellOrd);
+
+ {
+ NamedRegionTimer _T("collection", "hexinsert", TimingDetail);
+ collectInBlock(RootB, AvailR);
+ // Complete the information gathered in IFMap.
+ computeRemovableRegisters();
+ }
+
+ if (isDebug()) {
+ dbgs() << "Candidates after collection:\n";
+ dump_map();
+ }
+
+ if (IFMap.empty())
+ return false;
+
+ {
+ NamedRegionTimer _T("pruning", "hexinsert", TimingDetail);
+ pruneCandidates();
+ }
+
+ if (isDebug()) {
+ dbgs() << "Candidates after pruning:\n";
+ dump_map();
+ }
+
+ if (IFMap.empty())
+ return false;
+
+ {
+ NamedRegionTimer _T("selection", "hexinsert", TimingDetail);
+ selectCandidates();
+ }
+
+ if (isDebug()) {
+ dbgs() << "Candidates after selection:\n";
+ dump_map();
+ }
+
+ // Filter out vregs beyond the cutoff.
+ if (VRegIndexCutoff.getPosition()) {
+ unsigned Cutoff = VRegIndexCutoff;
+ typedef SmallVector<IFMapType::iterator,16> IterListType;
+ IterListType Out;
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first);
+ if (Idx >= Cutoff)
+ Out.push_back(I);
+ }
+ for (unsigned i = 0, n = Out.size(); i < n; ++i)
+ IFMap.erase(Out[i]);
+ }
+
+ {
+ NamedRegionTimer _T("generation", "hexinsert", TimingDetail);
+ Changed = generateInserts();
+ }
+
+ return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenInsert() {
+ return new HexagonGenInsert();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
+ "Hexagon generate \"insert\" instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
+ "Hexagon generate \"insert\" instructions", false, false)
diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp
new file mode 100644
index 000000000000..6905c4f6d125
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -0,0 +1,525 @@
+//===--- HexagonGenPredicate.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gen-pred"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "HexagonTargetMachine.h"
+
+#include <functional>
+#include <queue>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeHexagonGenPredicatePass(PassRegistry& Registry);
+ FunctionPass *createHexagonGenPredicate();
+}
+
+namespace {
+ struct Register {
+ unsigned R, S;
+ Register(unsigned r = 0, unsigned s = 0) : R(r), S(s) {}
+ Register(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {}
+ bool operator== (const Register &Reg) const {
+ return R == Reg.R && S == Reg.S;
+ }
+ bool operator< (const Register &Reg) const {
+ return R < Reg.R || (R == Reg.R && S < Reg.S);
+ }
+ };
+ struct PrintRegister {
+ PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {}
+ friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR);
+ private:
+ Register Reg;
+ const TargetRegisterInfo &TRI;
+ };
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR)
+ LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) {
+ return OS << PrintReg(PR.Reg.R, &PR.TRI, PR.Reg.S);
+ }
+
+ class HexagonGenPredicate : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonGenPredicate() : MachineFunctionPass(ID), TII(0), TRI(0), MRI(0) {
+ initializeHexagonGenPredicatePass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const {
+ return "Hexagon generate predicate operations";
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef SetVector<MachineInstr*> VectOfInst;
+ typedef std::set<Register> SetOfReg;
+ typedef std::map<Register,Register> RegToRegMap;
+
+ const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ SetOfReg PredGPRs;
+ VectOfInst PUsers;
+ RegToRegMap G2P;
+
+ bool isPredReg(unsigned R);
+ void collectPredicateGPR(MachineFunction &MF);
+ void processPredicateGPR(const Register &Reg);
+ unsigned getPredForm(unsigned Opc);
+ bool isConvertibleToPredForm(const MachineInstr *MI);
+ bool isScalarCmp(unsigned Opc);
+ bool isScalarPred(Register PredReg);
+ Register getPredRegFor(const Register &Reg);
+ bool convertToPredForm(MachineInstr *MI);
+ bool eliminatePredCopies(MachineFunction &MF);
+ };
+
+ char HexagonGenPredicate::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonGenPredicate, "hexagon-gen-pred",
+ "Hexagon generate predicate operations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonGenPredicate, "hexagon-gen-pred",
+ "Hexagon generate predicate operations", false, false)
+
+bool HexagonGenPredicate::isPredReg(unsigned R) {
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ return false;
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ return RC == &Hexagon::PredRegsRegClass;
+}
+
+
+unsigned HexagonGenPredicate::getPredForm(unsigned Opc) {
+ using namespace Hexagon;
+
+ switch (Opc) {
+ case A2_and:
+ case A2_andp:
+ return C2_and;
+ case A4_andn:
+ case A4_andnp:
+ return C2_andn;
+ case M4_and_and:
+ return C4_and_and;
+ case M4_and_andn:
+ return C4_and_andn;
+ case M4_and_or:
+ return C4_and_or;
+
+ case A2_or:
+ case A2_orp:
+ return C2_or;
+ case A4_orn:
+ case A4_ornp:
+ return C2_orn;
+ case M4_or_and:
+ return C4_or_and;
+ case M4_or_andn:
+ return C4_or_andn;
+ case M4_or_or:
+ return C4_or_or;
+
+ case A2_xor:
+ case A2_xorp:
+ return C2_xor;
+
+ case C2_tfrrp:
+ return COPY;
+ }
+ // The opcode corresponding to 0 is TargetOpcode::PHI. We can use 0 here
+ // to denote "none", but we need to make sure that none of the valid opcodes
+ // that we return will ever be 0.
+ assert(PHI == 0 && "Use different value for <none>");
+ return 0;
+}
+
+
+bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ if (getPredForm(Opc) != 0)
+ return true;
+
+ // Comparisons against 0 are also convertible. This does not apply to
+ // A4_rcmpeqi or A4_rcmpneqi, since they produce values 0 or 1, which
+ // may not match the value that the predicate register would have if
+ // it was converted to a predicate form.
+ switch (Opc) {
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C4_cmpneqi:
+ if (MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0)
+ return true;
+ break;
+ }
+ return false;
+}
+
+
+void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
+ for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
+ MachineBasicBlock &B = *A;
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::C2_tfrpr:
+ case TargetOpcode::COPY:
+ if (isPredReg(MI->getOperand(1).getReg())) {
+ Register RD = MI->getOperand(0);
+ if (TargetRegisterInfo::isVirtualRegister(RD.R))
+ PredGPRs.insert(RD);
+ }
+ break;
+ }
+ }
+ }
+}
+
+
+void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
+ DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": "
+ << PrintReg(Reg.R, TRI, Reg.S) << "\n");
+ typedef MachineRegisterInfo::use_iterator use_iterator;
+ use_iterator I = MRI->use_begin(Reg.R), E = MRI->use_end();
+ if (I == E) {
+ DEBUG(dbgs() << "Dead reg: " << PrintReg(Reg.R, TRI, Reg.S) << '\n');
+ MachineInstr *DefI = MRI->getVRegDef(Reg.R);
+ DefI->eraseFromParent();
+ return;
+ }
+
+ for (; I != E; ++I) {
+ MachineInstr *UseI = I->getParent();
+ if (isConvertibleToPredForm(UseI))
+ PUsers.insert(UseI);
+ }
+}
+
+
+Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
+ // Create a predicate register for a given Reg. The newly created register
+ // will have its value copied from Reg, so that it can be later used as
+ // an operand in other instructions.
+ assert(TargetRegisterInfo::isVirtualRegister(Reg.R));
+ RegToRegMap::iterator F = G2P.find(Reg);
+ if (F != G2P.end())
+ return F->second;
+
+ DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << PrintRegister(Reg, *TRI));
+ MachineInstr *DefI = MRI->getVRegDef(Reg.R);
+ assert(DefI);
+ unsigned Opc = DefI->getOpcode();
+ if (Opc == Hexagon::C2_tfrpr || Opc == TargetOpcode::COPY) {
+ assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse());
+ Register PR = DefI->getOperand(1);
+ G2P.insert(std::make_pair(Reg, PR));
+ DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n');
+ return PR;
+ }
+
+ MachineBasicBlock &B = *DefI->getParent();
+ DebugLoc DL = DefI->getDebugLoc();
+ const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+ unsigned NewPR = MRI->createVirtualRegister(PredRC);
+
+ // For convertible instructions, do not modify them, so that they can
+ // be coverted later. Generate a copy from Reg to NewPR.
+ if (isConvertibleToPredForm(DefI)) {
+ MachineBasicBlock::iterator DefIt = DefI;
+ BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR)
+ .addReg(Reg.R, 0, Reg.S);
+ G2P.insert(std::make_pair(Reg, Register(NewPR)));
+ DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI) << '\n');
+ return Register(NewPR);
+ }
+
+ llvm_unreachable("Invalid argument");
+}
+
+
+bool HexagonGenPredicate::isScalarCmp(unsigned Opc) {
+ switch (Opc) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpgtp:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C2_cmpgei:
+ case Hexagon::C2_cmpgeui:
+ case Hexagon::C4_cmpneqi:
+ case Hexagon::C4_cmpltei:
+ case Hexagon::C4_cmplteui:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ case Hexagon::A4_cmpbeq:
+ case Hexagon::A4_cmpbeqi:
+ case Hexagon::A4_cmpbgtu:
+ case Hexagon::A4_cmpbgtui:
+ case Hexagon::A4_cmpbgt:
+ case Hexagon::A4_cmpbgti:
+ case Hexagon::A4_cmpheq:
+ case Hexagon::A4_cmphgt:
+ case Hexagon::A4_cmphgtu:
+ case Hexagon::A4_cmpheqi:
+ case Hexagon::A4_cmphgti:
+ case Hexagon::A4_cmphgtui:
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonGenPredicate::isScalarPred(Register PredReg) {
+ std::queue<Register> WorkQ;
+ WorkQ.push(PredReg);
+
+ while (!WorkQ.empty()) {
+ Register PR = WorkQ.front();
+ WorkQ.pop();
+ const MachineInstr *DefI = MRI->getVRegDef(PR.R);
+ if (!DefI)
+ return false;
+ unsigned DefOpc = DefI->getOpcode();
+ switch (DefOpc) {
+ case TargetOpcode::COPY: {
+ const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+ if (MRI->getRegClass(PR.R) != PredRC)
+ return false;
+ // If it is a copy between two predicate registers, fall through.
+ }
+ case Hexagon::C2_and:
+ case Hexagon::C2_andn:
+ case Hexagon::C4_and_and:
+ case Hexagon::C4_and_andn:
+ case Hexagon::C4_and_or:
+ case Hexagon::C2_or:
+ case Hexagon::C2_orn:
+ case Hexagon::C4_or_and:
+ case Hexagon::C4_or_andn:
+ case Hexagon::C4_or_or:
+ case Hexagon::C4_or_orn:
+ case Hexagon::C2_xor:
+ // Add operands to the queue.
+ for (ConstMIOperands Mo(DefI); Mo.isValid(); ++Mo)
+ if (Mo->isReg() && Mo->isUse())
+ WorkQ.push(Register(Mo->getReg()));
+ break;
+
+ // All non-vector compares are ok, everything else is bad.
+ default:
+ return isScalarCmp(DefOpc);
+ }
+ }
+
+ return true;
+}
+
+
+bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
+ DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << MI << " " << *MI);
+
+ unsigned Opc = MI->getOpcode();
+ assert(isConvertibleToPredForm(MI));
+ unsigned NumOps = MI->getNumOperands();
+ for (unsigned i = 0; i < NumOps; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ Register Reg(MO);
+ if (Reg.S && Reg.S != Hexagon::subreg_loreg)
+ return false;
+ if (!PredGPRs.count(Reg))
+ return false;
+ }
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned NewOpc = getPredForm(Opc);
+ // Special case for comparisons against 0.
+ if (NewOpc == 0) {
+ switch (Opc) {
+ case Hexagon::C2_cmpeqi:
+ NewOpc = Hexagon::C2_not;
+ break;
+ case Hexagon::C4_cmpneqi:
+ NewOpc = TargetOpcode::COPY;
+ break;
+ default:
+ return false;
+ }
+
+ // If it's a scalar predicate register, then all bits in it are
+ // the same. Otherwise, to determine whether all bits are 0 or not
+ // we would need to use any8.
+ Register PR = getPredRegFor(MI->getOperand(1));
+ if (!isScalarPred(PR))
+ return false;
+ // This will skip the immediate argument when creating the predicate
+ // version instruction.
+ NumOps = 2;
+ }
+
+ // Some sanity: check that def is in operand #0.
+ MachineOperand &Op0 = MI->getOperand(0);
+ assert(Op0.isDef());
+ Register OutR(Op0);
+
+ // Don't use getPredRegFor, since it will create an association between
+ // the argument and a created predicate register (i.e. it will insert a
+ // copy if a new predicate register is created).
+ const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+ Register NewPR = MRI->createVirtualRegister(PredRC);
+ MachineInstrBuilder MIB = BuildMI(B, MI, DL, TII->get(NewOpc), NewPR.R);
+
+ // Add predicate counterparts of the GPRs.
+ for (unsigned i = 1; i < NumOps; ++i) {
+ Register GPR = MI->getOperand(i);
+ Register Pred = getPredRegFor(GPR);
+ MIB.addReg(Pred.R, 0, Pred.S);
+ }
+ DEBUG(dbgs() << "generated: " << *MIB);
+
+ // Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR
+ // with NewGPR.
+ const TargetRegisterClass *RC = MRI->getRegClass(OutR.R);
+ unsigned NewOutR = MRI->createVirtualRegister(RC);
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), NewOutR)
+ .addReg(NewPR.R, 0, NewPR.S);
+ MRI->replaceRegWith(OutR.R, NewOutR);
+ MI->eraseFromParent();
+
+ // If the processed instruction was C2_tfrrp (i.e. Rn = Pm; Pk = Rn),
+ // then the output will be a predicate register. Do not visit the
+ // users of it.
+ if (!isPredReg(NewOutR)) {
+ Register R(NewOutR);
+ PredGPRs.insert(R);
+ processPredicateGPR(R);
+ }
+ return true;
+}
+
+
+bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
+ DEBUG(dbgs() << LLVM_FUNCTION_NAME << "\n");
+ const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+ bool Changed = false;
+ VectOfInst Erase;
+
+ // First, replace copies
+ // IntR = PredR1
+ // PredR2 = IntR
+ // with
+ // PredR2 = PredR1
+ // Such sequences can be generated when a copy-into-pred is generated from
+ // a gpr register holding a result of a convertible instruction. After
+ // the convertible instruction is converted, its predicate result will be
+ // copied back into the original gpr.
+
+ for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
+ MachineBasicBlock &B = *A;
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ if (I->getOpcode() != TargetOpcode::COPY)
+ continue;
+ Register DR = I->getOperand(0);
+ Register SR = I->getOperand(1);
+ if (!TargetRegisterInfo::isVirtualRegister(DR.R))
+ continue;
+ if (!TargetRegisterInfo::isVirtualRegister(SR.R))
+ continue;
+ if (MRI->getRegClass(DR.R) != PredRC)
+ continue;
+ if (MRI->getRegClass(SR.R) != PredRC)
+ continue;
+ assert(!DR.S && !SR.S && "Unexpected subregister");
+ MRI->replaceRegWith(DR.R, SR.R);
+ Erase.insert(I);
+ Changed = true;
+ }
+ }
+
+ for (VectOfInst::iterator I = Erase.begin(), E = Erase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
+
+ return Changed;
+}
+
+
+bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ TRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ PredGPRs.clear();
+ PUsers.clear();
+ G2P.clear();
+
+ bool Changed = false;
+ collectPredicateGPR(MF);
+ for (SetOfReg::iterator I = PredGPRs.begin(), E = PredGPRs.end(); I != E; ++I)
+ processPredicateGPR(*I);
+
+ bool Again;
+ do {
+ Again = false;
+ VectOfInst Processed, Copy;
+
+ typedef VectOfInst::iterator iterator;
+ Copy = PUsers;
+ for (iterator I = Copy.begin(), E = Copy.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+ bool Done = convertToPredForm(MI);
+ if (Done) {
+ Processed.insert(MI);
+ Again = true;
+ }
+ }
+ Changed |= Again;
+
+ auto Done = [Processed] (MachineInstr *MI) -> bool {
+ return Processed.count(MI);
+ };
+ PUsers.remove_if(Done);
+ } while (Again);
+
+ Changed |= eliminatePredCopies(MF);
+ return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenPredicate() {
+ return new HexagonGenPredicate();
+}
+
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 6e9e69f5a2c7..c739afb70c15 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -459,6 +459,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
MachineFunction &MF = DAG.getMachineFunction();
+ auto PtrVT = getPointerTy(MF.getDataLayout());
// Check for varargs.
int NumNamedVarArgParams = -1;
@@ -515,8 +516,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 8> MemOpChains;
auto &HRI = *Subtarget.getRegisterInfo();
- SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(),
- getPointerTy());
+ SDValue StackPtr =
+ DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -574,7 +575,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
if (!isTailCall) {
- SDValue C = DAG.getConstant(NumBytes, dl, getPointerTy(), true);
+ SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true);
Chain = DAG.getCALLSEQ_START(Chain, C, dl);
}
@@ -615,13 +616,13 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (flag_aligned_memcpy) {
const char *MemcpyName =
"__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
- Callee = DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
+ Callee = DAG.getTargetExternalSymbol(MemcpyName, PtrVT);
flag_aligned_memcpy = false;
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT);
} else if (ExternalSymbolSDNode *S =
dyn_cast<ExternalSymbolSDNode>(Callee)) {
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT);
}
// Returns a chain & a flag for retval copy to use.
@@ -811,8 +812,8 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
}
- SDValue JumpTableBase = DAG.getNode(HexagonISD::JT, dl,
- getPointerTy(), TargetJT);
+ SDValue JumpTableBase = DAG.getNode(
+ HexagonISD::JT, dl, getPointerTy(DAG.getDataLayout()), TargetJT);
SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
DAG.getConstant(2, dl, MVT::i32));
SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
@@ -1231,16 +1232,17 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
SDLoc dl(Op);
- Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
const HexagonTargetObjectFile *TLOF =
static_cast<const HexagonTargetObjectFile *>(
getTargetMachine().getObjFileLowering());
if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) {
- return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, Result);
}
- return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
+ return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, Result);
}
// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
@@ -1261,7 +1263,8 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32);
SDLoc dl(Op);
- return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), BA_SD);
+ return DAG.getNode(HexagonISD::CONST32_GP, dl,
+ getPointerTy(DAG.getDataLayout()), BA_SD);
}
//===----------------------------------------------------------------------===//
@@ -2254,6 +2257,7 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
SDLoc dl(Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
// Mark function as containing a call to EH_RETURN.
HexagonMachineFunctionInfo *FuncInfo =
@@ -2262,9 +2266,9 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
unsigned OffsetReg = Hexagon::R28;
- SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
- DAG.getRegister(Hexagon::R30, getPointerTy()),
- DAG.getIntPtrConstant(4, dl));
+ SDValue StoreAddr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
+ DAG.getIntPtrConstant(4, dl));
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
@@ -2338,8 +2342,7 @@ HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
std::pair<unsigned, const TargetRegisterClass *>
HexagonTargetLowering::getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI, const std::string &Constraint,
- MVT VT) const {
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r': // R0-R31
@@ -2372,8 +2375,8 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
/// isLegalAddressingMode - Return true if the addressing mode represented by
/// AM is legal for this target, for a load/store of the specified type.
-bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// Allows a signed-extended 11-bit immediate field.
if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1)
@@ -2463,3 +2466,45 @@ bool llvm::isPositiveHalfWord(SDNode *N) {
return true;
}
}
+
+Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const {
+ BasicBlock *BB = Builder.GetInsertBlock();
+ Module *M = BB->getParent()->getParent();
+ Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+ unsigned SZ = Ty->getPrimitiveSizeInBits();
+ assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
+ Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
+ : Intrinsic::hexagon_L4_loadd_locked;
+ Value *Fn = Intrinsic::getDeclaration(M, IntID);
+ return Builder.CreateCall(Fn, Addr, "larx");
+}
+
+/// Perform a store-conditional operation to Addr. Return the status of the
+/// store. This should be 0 if the store succeeded, non-zero otherwise.
+Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
+ Value *Val, Value *Addr, AtomicOrdering Ord) const {
+ BasicBlock *BB = Builder.GetInsertBlock();
+ Module *M = BB->getParent()->getParent();
+ Type *Ty = Val->getType();
+ unsigned SZ = Ty->getPrimitiveSizeInBits();
+ assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
+ Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
+ : Intrinsic::hexagon_S4_stored_locked;
+ Value *Fn = Intrinsic::getDeclaration(M, IntID);
+ Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
+ Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
+ Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
+ return Ext;
+}
+
+bool HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ // Do not expand loads and stores that don't exceed 64 bits.
+ return LI->getType()->getPrimitiveSizeInBits() > 64;
+}
+
+bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ // Do not expand loads and stores that don't exceed 64 bits.
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
+}
+
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index b80e8477eb7b..2642abffaddd 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -165,7 +165,8 @@ bool isPositiveHalfWord(SDNode *N);
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- EVT getSetCCResultType(LLVMContext &C, EVT VT) const override {
+ EVT getSetCCResultType(const DataLayout &, LLVMContext &C,
+ EVT VT) const override {
if (!VT.isVector())
return MVT::i1;
else
@@ -179,11 +180,10 @@ bool isPositiveHalfWord(SDNode *N);
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "o")
return InlineAsm::Constraint_o;
else if (ConstraintCode == "v")
@@ -198,8 +198,8 @@ bool isPositiveHalfWord(SDNode *N);
/// The type may be VoidTy, in which case only return true if the addressing
/// mode is legal for a load/store of any legal type.
/// TODO: Handle pre/postinc as well.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -207,6 +207,21 @@ bool isPositiveHalfWord(SDNode *N);
/// compare a register against the immediate without having to materialize
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
+
+ // Handling of atomic RMW instructions.
+ bool hasLoadLinkedStoreConditional() const override {
+ return true;
+ }
+ Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const override;
+ Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr, AtomicOrdering Ord) const override;
+ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI)
+ const override {
+ return AtomicRMWExpansionKind::LLSC;
+ }
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 8f255a08f534..f6bb4a045438 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -221,7 +221,7 @@ unsigned HexagonRegisterInfo::getRARegister() const {
unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
&MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const HexagonFrameLowering *TFI = getFrameLowering(MF);
if (TFI->hasFP(MF))
return Hexagon::R30;
return Hexagon::R29;
@@ -240,7 +240,8 @@ unsigned HexagonRegisterInfo::getStackRegister() const {
bool
HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
- return MF.getSubtarget().getFrameLowering()->hasFP(MF);
+ const HexagonFrameLowering *TFI = getFrameLowering(MF);
+ return TFI->hasFP(MF);
}
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index b5db997eb1b8..276cc69eed0f 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -18,12 +18,6 @@ using namespace llvm;
bool llvm::flag_aligned_memcpy;
-HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const DataLayout &DL)
- : TargetSelectionDAGInfo(&DL) {}
-
-HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() {
-}
-
SDValue
HexagonSelectionDAGInfo::
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index 8ac2e43f9294..80ac5d7bd9e2 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -20,8 +20,6 @@ namespace llvm {
class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit HexagonSelectionDAGInfo(const DataLayout &DL);
- ~HexagonSelectionDAGInfo();
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index fe6c4f4298b5..cd482b3e3af1 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -74,7 +74,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
StringRef FS, const TargetMachine &TM)
: HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- TSInfo(*TM.getDataLayout()), FrameLowering() {
+ FrameLowering() {
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index a173a8087832..b50442969a29 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -37,6 +37,18 @@ static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
cl::init(true), cl::Hidden, cl::ZeroOrMore,
cl::desc("Early expansion of MUX"));
+static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true),
+ cl::Hidden, cl::desc("Generate \"insert\" instructions"));
+
+static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions"));
+
+static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true),
+ cl::Hidden, cl::desc("Generate \"extract\" instructions"));
+
+static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true),
+ cl::Hidden, cl::desc("Enable conversion of arithmetic operations to "
+ "predicate instructions"));
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
@@ -60,23 +72,23 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
createVLIWMachineSched);
namespace llvm {
- FunctionPass *createHexagonExpandCondsets();
- FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
- CodeGenOpt::Level OptLevel);
- FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
- FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM);
- FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
FunctionPass *createHexagonCFGOptimizer();
-
- FunctionPass *createHexagonSplitConst32AndConst64();
+ FunctionPass *createHexagonCommonGEP();
+ FunctionPass *createHexagonCopyToCombine();
+ FunctionPass *createHexagonExpandCondsets();
FunctionPass *createHexagonExpandPredSpillCode();
- FunctionPass *createHexagonHardwareLoops();
- FunctionPass *createHexagonPeephole();
FunctionPass *createHexagonFixupHwLoops();
+ FunctionPass *createHexagonGenExtract();
+ FunctionPass *createHexagonGenInsert();
+ FunctionPass *createHexagonGenPredicate();
+ FunctionPass *createHexagonHardwareLoops();
+ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
FunctionPass *createHexagonNewValueJump();
- FunctionPass *createHexagonCopyToCombine();
FunctionPass *createHexagonPacketizer();
- FunctionPass *createHexagonNewValueJump();
+ FunctionPass *createHexagonPeephole();
+ FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
+ FunctionPass *createHexagonSplitConst32AndConst64();
} // end namespace llvm;
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
@@ -122,6 +134,7 @@ public:
return createVLIWMachineSched(C);
}
+ void addIRPasses() override;
bool addInstSelector() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
@@ -134,6 +147,20 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
return new HexagonPassConfig(this, PM);
}
+void HexagonPassConfig::addIRPasses() {
+ TargetPassConfig::addIRPasses();
+ bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+
+ addPass(createAtomicExpandPass(TM));
+ if (!NoOpt) {
+ if (EnableCommGEP)
+ addPass(createHexagonCommonGEP());
+ // Replace certain combinations of shifts and ands with extracts.
+ if (EnableGenExtract)
+ addPass(createHexagonGenExtract());
+ }
+}
+
bool HexagonPassConfig::addInstSelector() {
HexagonTargetMachine &TM = getHexagonTargetMachine();
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
@@ -144,8 +171,13 @@ bool HexagonPassConfig::addInstSelector() {
addPass(createHexagonISelDag(TM, getOptLevel()));
if (!NoOpt) {
+ // Create logical operations on predicate registers.
+ if (EnableGenPred)
+ addPass(createHexagonGenPredicate(), false);
addPass(createHexagonPeephole());
printAndVerify("After hexagon peephole pass");
+ if (EnableGenInsert)
+ addPass(createHexagonGenInsert(), false);
}
return false;
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
index 8259055b3f41..9d288af0214a 100644
--- a/lib/Target/Hexagon/LLVMBuild.txt
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -39,4 +39,5 @@ required_libraries =
SelectionDAG
Support
Target
+ TransformUtils
add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 83ce0abd835e..53305d85fd80 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -46,7 +46,7 @@ MCInstrInfo *llvm::createHexagonMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitHexagonMCRegisterInfo(X, Hexagon::R0);
return X;
@@ -54,9 +54,7 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *
createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createHexagonMCSubtargetInfoImpl(TT, CPU, FS);
}
namespace {
@@ -151,7 +149,8 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index a8f9b52746ad..3f377631c016 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -18,7 +18,6 @@ add_llvm_target(MSP430CodeGen
MSP430RegisterInfo.cpp
MSP430Subtarget.cpp
MSP430TargetMachine.cpp
- MSP430SelectionDAGInfo.cpp
MSP430AsmPrinter.cpp
MSP430MCInstLower.cpp
)
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index be445c56389a..807d1129b5fc 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -37,7 +37,7 @@ static MCInstrInfo *createMSP430MCInstrInfo() {
return X;
}
-static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createMSP430MCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitMSP430MCRegisterInfo(X, MSP430::PC);
return X;
@@ -45,12 +45,11 @@ static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *
createMSP430MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitMSP430MCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createMSP430MCSubtargetInfoImpl(TT, CPU, FS);
}
-static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createMSP430MCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 5ce5013d898c..8a01334ee2dd 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -254,10 +254,11 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
AM.Base.Reg = CurDAG->getRegister(0, VT);
}
- Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ?
- CurDAG->getTargetFrameIndex(AM.Base.FrameIndex,
- getTargetLowering()->getPointerTy()) :
- AM.Base.Reg;
+ Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase)
+ ? CurDAG->getTargetFrameIndex(
+ AM.Base.FrameIndex,
+ getTargetLowering()->getPointerTy(CurDAG->getDataLayout()))
+ : AM.Base.Reg;
if (AM.GV)
Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(N),
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index bc51741a836f..29bc8b33988a 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -213,7 +213,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
TargetLowering::ConstraintType
-MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
+MSP430TargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
@@ -227,8 +227,7 @@ MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
std::pair<unsigned, const TargetRegisterClass *>
MSP430TargetLowering::getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI, const std::string &Constraint,
- MVT VT) const {
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
@@ -494,7 +493,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
if (Flags.isByVal()) {
int FI = MFI->CreateFixedObject(Flags.getByValSize(),
VA.getLocMemOffset(), true);
- InVal = DAG.getFrameIndex(FI, getPointerTy());
+ InVal = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
} else {
// Load the argument to a virtual register
unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
@@ -592,10 +591,10 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, dl,
- getPointerTy(), true),
- dl);
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getConstant(NumBytes, dl, PtrVT, true), dl);
SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
SmallVector<SDValue, 12> MemOpChains;
@@ -630,12 +629,11 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
assert(VA.isMemLoc());
if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SP, getPointerTy());
+ StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SP, PtrVT);
- SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
- StackPtr,
- DAG.getIntPtrConstant(VA.getLocMemOffset(),
- dl));
+ SDValue PtrOff =
+ DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset(), dl));
SDValue MemOp;
ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -700,11 +698,8 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getConstant(NumBytes, dl, getPointerTy(),
- true),
- DAG.getConstant(0, dl, getPointerTy(), true),
- InFlag, dl);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, dl, PtrVT, true),
+ DAG.getConstant(0, dl, PtrVT, true), InFlag, dl);
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
@@ -788,30 +783,31 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
// Create the TargetGlobalAddress node, folding in the constant offset.
- SDValue Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
- getPointerTy(), Offset);
- return DAG.getNode(MSP430ISD::Wrapper, SDLoc(Op),
- getPointerTy(), Result);
+ SDValue Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), PtrVT, Offset);
+ return DAG.getNode(MSP430ISD::Wrapper, SDLoc(Op), PtrVT, Result);
}
SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
- SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT);
- return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);
+ return DAG.getNode(MSP430ISD::Wrapper, dl, PtrVT, Result);
}
SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy());
+ SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT);
- return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);
+ return DAG.getNode(MSP430ISD::Wrapper, dl, PtrVT, Result);
}
static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
@@ -1024,16 +1020,17 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
+ auto PtrVT = getPointerTy(MF.getDataLayout());
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
- uint64_t SlotSize = getDataLayout()->getPointerSize();
+ uint64_t SlotSize = MF.getDataLayout().getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
true);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
- return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+ return DAG.getFrameIndex(ReturnAddrIndex, PtrVT);
}
SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
@@ -1046,21 +1043,21 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
- DAG.getConstant(getDataLayout()->getPointerSize(), dl, MVT::i16);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- DAG.getNode(ISD::ADD, dl, getPointerTy(),
- FrameAddr, Offset),
+ DAG.getConstant(DAG.getDataLayout().getPointerSize(), dl, MVT::i16);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, MachinePointerInfo(), false, false, false, 0);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
+ MachinePointerInfo(), false, false, false, 0);
}
SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -1084,10 +1081,11 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
// Frame index of first vararg argument
- SDValue FrameIndex = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
- getPointerTy());
+ SDValue FrameIndex =
+ DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
// Create a store of the frame index to the location operand
@@ -1099,9 +1097,9 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op,
SDValue MSP430TargetLowering::LowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
- return DAG.getNode(MSP430ISD::Wrapper, SDLoc(JT),
- getPointerTy(), Result);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ return DAG.getNode(MSP430ISD::Wrapper, SDLoc(JT), PtrVT, Result);
}
/// getPostIndexedAddressParts - returns true by value, base pointer and
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 80d3ae175fb1..2d63852c185b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -72,7 +72,9 @@ namespace llvm {
explicit MSP430TargetLowering(const TargetMachine &TM,
const MSP430Subtarget &STI);
- MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i8;
+ }
/// LowerOperation - Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -96,11 +98,10 @@ namespace llvm {
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
TargetLowering::ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ getConstraintType(StringRef Constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
/// isTruncateFree - Return true if it's free to truncate a value of type
/// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 614467bcd248..2fb82e535e8d 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -37,7 +37,7 @@ MSP430RegisterInfo::MSP430RegisterInfo()
const MCPhysReg*
MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ const MSP430FrameLowering *TFI = getFrameLowering(*MF);
const Function* F = MF->getFunction();
static const MCPhysReg CalleeSavedRegs[] = {
MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
@@ -73,7 +73,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const MSP430FrameLowering *TFI = getFrameLowering(MF);
// Mark 4 special registers with subregisters as reserved.
Reserved.set(MSP430::PCB);
@@ -109,7 +109,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const MSP430FrameLowering *TFI = getFrameLowering(MF);
DebugLoc dl = MI.getDebugLoc();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
@@ -156,7 +156,6 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-
+ const MSP430FrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? MSP430::FP : MSP430::SP;
}
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
deleted file mode 100644
index 3897ef684d4d..000000000000
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the MSP430SelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MSP430TargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "msp430-selectiondag-info"
-
-MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const DataLayout &DL)
- : TargetSelectionDAGInfo(&DL) {}
-
-MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
-}
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
deleted file mode 100644
index 61a6b19111db..000000000000
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the MSP430 subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MSP430_MSP430SELECTIONDAGINFO_H
-#define LLVM_LIB_TARGET_MSP430_MSP430SELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class MSP430TargetMachine;
-
-class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
- explicit MSP430SelectionDAGInfo(const DataLayout &DL);
- ~MSP430SelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 6374f41c00ea..6216348e4d71 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -34,5 +34,4 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
MSP430Subtarget::MSP430Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
: MSP430GenSubtargetInfo(TT, CPU, FS), FrameLowering(),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- TSInfo(*TM.getDataLayout()) {}
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {}
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 81f6f027d45c..ff2656d26dd2 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -18,8 +18,8 @@
#include "MSP430ISelLowering.h"
#include "MSP430InstrInfo.h"
#include "MSP430RegisterInfo.h"
-#include "MSP430SelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -35,7 +35,7 @@ class MSP430Subtarget : public MSP430GenSubtargetInfo {
MSP430FrameLowering FrameLowering;
MSP430InstrInfo InstrInfo;
MSP430TargetLowering TLInfo;
- MSP430SelectionDAGInfo TSInfo;
+ TargetSelectionDAGInfo TSInfo;
public:
/// This constructor initializes the data members to match that
@@ -60,7 +60,7 @@ public:
const MSP430TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- const MSP430SelectionDAGInfo *getSelectionDAGInfo() const override {
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
};
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index f14156dbfa2b..5107d2ae58c3 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -1727,37 +1727,59 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
}
namespace {
-template <unsigned ShiftAmount>
+void emitRX(unsigned Opcode, unsigned DstReg, MCOperand Imm, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ tmpInst.setOpcode(Opcode);
+ tmpInst.addOperand(MCOperand::createReg(DstReg));
+ tmpInst.addOperand(Imm);
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+
+void emitRI(unsigned Opcode, unsigned DstReg, int16_t Imm, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ emitRX(Opcode, DstReg, MCOperand::createImm(Imm), IDLoc, Instructions);
+}
+
+
+void emitRRX(unsigned Opcode, unsigned DstReg, unsigned SrcReg, MCOperand Imm,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ tmpInst.setOpcode(Opcode);
+ tmpInst.addOperand(MCOperand::createReg(DstReg));
+ tmpInst.addOperand(MCOperand::createReg(SrcReg));
+ tmpInst.addOperand(Imm);
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+
+void emitRRR(unsigned Opcode, unsigned DstReg, unsigned SrcReg,
+ unsigned SrcReg2, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ emitRRX(Opcode, DstReg, SrcReg, MCOperand::createReg(SrcReg2), IDLoc,
+ Instructions);
+}
+
+void emitRRI(unsigned Opcode, unsigned DstReg, unsigned SrcReg, int16_t Imm,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+ emitRRX(Opcode, DstReg, SrcReg, MCOperand::createImm(Imm), IDLoc,
+ Instructions);
+}
+
+template <int16_t ShiftAmount>
void createLShiftOri(MCOperand Operand, unsigned RegNo, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
- MCInst tmpInst;
- if (ShiftAmount >= 32) {
- tmpInst.setOpcode(Mips::DSLL32);
- tmpInst.addOperand(MCOperand::createReg(RegNo));
- tmpInst.addOperand(MCOperand::createReg(RegNo));
- tmpInst.addOperand(MCOperand::createImm(ShiftAmount - 32));
- tmpInst.setLoc(IDLoc);
- Instructions.push_back(tmpInst);
- tmpInst.clear();
- } else if (ShiftAmount > 0) {
- tmpInst.setOpcode(Mips::DSLL);
- tmpInst.addOperand(MCOperand::createReg(RegNo));
- tmpInst.addOperand(MCOperand::createReg(RegNo));
- tmpInst.addOperand(MCOperand::createImm(ShiftAmount));
- tmpInst.setLoc(IDLoc);
- Instructions.push_back(tmpInst);
- tmpInst.clear();
- }
+ if (ShiftAmount >= 32)
+ emitRRI(Mips::DSLL32, RegNo, RegNo, ShiftAmount - 32, IDLoc, Instructions);
+ else if (ShiftAmount > 0)
+ emitRRI(Mips::DSLL, RegNo, RegNo, ShiftAmount, IDLoc, Instructions);
+
// There's no need for an ORi if the immediate is 0.
if (Operand.isImm() && Operand.getImm() == 0)
return;
- tmpInst.setOpcode(Mips::ORi);
- tmpInst.addOperand(MCOperand::createReg(RegNo));
- tmpInst.addOperand(MCOperand::createReg(RegNo));
- tmpInst.addOperand(Operand);
- tmpInst.setLoc(IDLoc);
- Instructions.push_back(tmpInst);
+ emitRRX(Mips::ORi, RegNo, RegNo, Operand, IDLoc, Instructions);
}
template <unsigned ShiftAmount>
@@ -1818,12 +1840,22 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
return true;
}
+ if (Is32BitImm) {
+ if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
+ // Sign extend up to 64-bit so that the predicates match the hardware
+ // behaviour. In particular, isInt<16>(0xffff8000) and similar should be
+ // true.
+ ImmValue = SignExtend64<32>(ImmValue);
+ } else {
+ Error(IDLoc, "instruction requires a 32-bit immediate");
+ return true;
+ }
+ }
+
bool UseSrcReg = false;
if (SrcReg != Mips::NoRegister)
UseSrcReg = true;
- MCInst tmpInst;
-
unsigned TmpReg = DstReg;
if (UseSrcReg && (DstReg == SrcReg)) {
// At this point we need AT to perform the expansions and we exit if it is
@@ -1834,29 +1866,26 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
TmpReg = ATReg;
}
- tmpInst.setLoc(IDLoc);
// FIXME: gas has a special case for values that are 000...1111, which
// becomes a li -1 and then a dsrl
- if (0 <= ImmValue && ImmValue <= 65535) {
- // For unsigned and positive signed 16-bit values (0 <= j <= 65535):
- // li d,j => ori d,$zero,j
- if (!UseSrcReg)
- SrcReg = isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
- tmpInst.setOpcode(Mips::ORi);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
- tmpInst.addOperand(MCOperand::createReg(SrcReg));
- tmpInst.addOperand(MCOperand::createImm(ImmValue));
- Instructions.push_back(tmpInst);
- } else if (ImmValue < 0 && ImmValue >= -32768) {
- // For negative signed 16-bit values (-32768 <= j < 0):
+ if (isInt<16>(ImmValue)) {
// li d,j => addiu d,$zero,j
if (!UseSrcReg)
SrcReg = Mips::ZERO;
- tmpInst.setOpcode(Mips::ADDiu);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
- tmpInst.addOperand(MCOperand::createReg(SrcReg));
- tmpInst.addOperand(MCOperand::createImm(ImmValue));
- Instructions.push_back(tmpInst);
+ emitRRI(Mips::ADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions);
+ } else if (isUInt<16>(ImmValue)) {
+ // li d,j => ori d,$zero,j
+ unsigned TmpReg = DstReg;
+ if (SrcReg == DstReg) {
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+ TmpReg = ATReg;
+ }
+
+ emitRRI(Mips::ORi, TmpReg, Mips::ZERO, ImmValue, IDLoc, Instructions);
+ if (UseSrcReg)
+ emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
} else if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
warnIfNoMacro(IDLoc);
@@ -1869,30 +1898,16 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
if (!Is32BitImm && !isInt<32>(ImmValue)) {
// For DLI, expand to an ORi instead of a LUi to avoid sign-extending the
// upper 32 bits.
- tmpInst.setOpcode(Mips::ORi);
- tmpInst.addOperand(MCOperand::createReg(TmpReg));
- tmpInst.addOperand(MCOperand::createReg(Mips::ZERO));
- tmpInst.addOperand(MCOperand::createImm(Bits31To16));
- tmpInst.setLoc(IDLoc);
- Instructions.push_back(tmpInst);
- // Move the value to the upper 16 bits by doing a 16-bit left shift.
- createLShiftOri<16>(0, TmpReg, IDLoc, Instructions);
- } else {
- tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(TmpReg));
- tmpInst.addOperand(MCOperand::createImm(Bits31To16));
- Instructions.push_back(tmpInst);
- }
+ emitRRI(Mips::ORi, TmpReg, Mips::ZERO, Bits31To16, IDLoc, Instructions);
+ emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, Instructions);
+ } else
+ emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions);
createLShiftOri<0>(Bits15To0, TmpReg, IDLoc, Instructions);
if (UseSrcReg)
createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
} else if ((ImmValue & (0xffffLL << 48)) == 0) {
- if (Is32BitImm) {
- Error(IDLoc, "instruction requires a 32-bit immediate");
- return true;
- }
warnIfNoMacro(IDLoc);
// <------- lo32 ------>
@@ -1912,10 +1927,7 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
uint16_t Bits15To0 = ImmValue & 0xffff;
- tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(TmpReg));
- tmpInst.addOperand(MCOperand::createImm(Bits47To32));
- Instructions.push_back(tmpInst);
+ emitRI(Mips::LUi, TmpReg, Bits47To32, IDLoc, Instructions);
createLShiftOri<0>(Bits31To16, TmpReg, IDLoc, Instructions);
createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
@@ -1923,10 +1935,6 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
} else {
- if (Is32BitImm) {
- Error(IDLoc, "instruction requires a 32-bit immediate");
- return true;
- }
warnIfNoMacro(IDLoc);
// <------- hi32 ------> <------- lo32 ------>
@@ -1948,10 +1956,7 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
uint16_t Bits15To0 = ImmValue & 0xffff;
- tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(TmpReg));
- tmpInst.addOperand(MCOperand::createImm(Bits63To48));
- Instructions.push_back(tmpInst);
+ emitRI(Mips::LUi, TmpReg, Bits63To48, IDLoc, Instructions);
createLShiftOri<0>(Bits47To32, TmpReg, IDLoc, Instructions);
// When Bits31To16 is 0, do a left shift of 32 bits instead of doing
@@ -2096,8 +2101,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(
tmpInst.addOperand(MCOperand::createExpr(HiExpr));
Instructions.push_back(tmpInst);
- createLShiftOri<0>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(),
- Instructions);
+ emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), SMLoc(),
+ Instructions);
}
if (UseSrcReg)
@@ -2708,12 +2713,8 @@ void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
unsigned TrgReg, bool Is64Bit,
SmallVectorImpl<MCInst> &Instructions) {
- MCInst AdduInst;
- AdduInst.setOpcode(Is64Bit ? Mips::DADDu : Mips::ADDu);
- AdduInst.addOperand(MCOperand::createReg(DstReg));
- AdduInst.addOperand(MCOperand::createReg(SrcReg));
- AdduInst.addOperand(MCOperand::createReg(TrgReg));
- Instructions.push_back(AdduInst);
+ emitRRR(Is64Bit ? Mips::DADDu : Mips::ADDu, DstReg, SrcReg, TrgReg, SMLoc(),
+ Instructions);
}
unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 36ba8e559e0b..bde843afd3d2 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -46,7 +46,6 @@ add_llvm_target(MipsCodeGen
MipsSubtarget.cpp
MipsTargetMachine.cpp
MipsTargetObjectFile.cpp
- MipsSelectionDAGInfo.cpp
)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 9bdf8235a2b4..949ee1474f96 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -59,7 +59,7 @@ static MCInstrInfo *createMipsMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createMipsMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitMipsMCRegisterInfo(X, Mips::RA);
return X;
@@ -68,9 +68,7 @@ static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *createMipsMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
CPU = MIPS_MC::selectMipsCPU(TT, CPU);
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitMipsMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createMipsMCSubtargetInfoImpl(TT, CPU, FS);
}
static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI,
@@ -84,7 +82,7 @@ static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createMipsMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index db2a924a99f9..46cc99c62393 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -152,18 +152,19 @@ Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return isInt<15>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
}
-void Mips16FrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+void Mips16FrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const Mips16InstrInfo &TII =
*static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
const MipsRegisterInfo &RI = TII.getRegisterInfo();
const BitVector Reserved = RI.getReservedRegs(MF);
bool SaveS2 = Reserved[Mips::S2];
if (SaveS2)
- MF.getRegInfo().setPhysRegUsed(Mips::S2);
+ SavedRegs.set(Mips::S2);
if (hasFP(MF))
- MF.getRegInfo().setPhysRegUsed(Mips::S0);
+ SavedRegs.set(Mips::S0);
}
const MipsFrameLowering *
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index f281c927c1c4..b48ed4641ea7 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -38,8 +38,8 @@ public:
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
};
} // End llvm namespace
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 7b6a2a154471..bce2c1eb4485 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -120,13 +120,13 @@ void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() {
unsigned Mips16SPAliasReg =
MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
- return CurDAG->getRegister(Mips16SPAliasReg,
- getTargetLowering()->getPointerTy());
+ auto PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout());
+ return CurDAG->getRegister(Mips16SPAliasReg, PtrVT);
}
void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
- SDValue AliasFPReg = CurDAG->getRegister(Mips::S0,
- getTargetLowering()->getPointerTy());
+ auto PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout());
+ SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, PtrVT);
if (Parent) {
switch (Parent->getOpcode()) {
case ISD::LOAD: {
@@ -155,7 +155,7 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
}
}
}
- AliasReg = CurDAG->getRegister(Mips::SP, getTargetLowering()->getPointerTy());
+ AliasReg = CurDAG->getRegister(Mips::SP, PtrVT);
return;
}
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 846e3c964f44..3522cbb1f36a 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -502,7 +502,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
unsigned V0Reg = Mips::V0;
if (NeedMips16Helper) {
RegsToPass.push_front(std::make_pair(V0Reg, Callee));
- JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
+ JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction,
+ getPointerTy(DAG.getDataLayout()));
ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget);
JumpTarget = getAddrGlobal(S, CLI.DL, JumpTarget.getValueType(), DAG,
MipsII::MO_GOT, Chain,
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index c2651b82d285..e2f6fcc17726 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -267,7 +267,7 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
}
unsigned MipsFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
- assert(TLI.getValueType(AI->getType(), true) == MVT::i32 &&
+ assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i32 &&
"Alloca should always return a pointer.");
DenseMap<const AllocaInst *, int>::iterator SI =
@@ -382,7 +382,7 @@ unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) {
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple())
@@ -507,12 +507,13 @@ bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
break;
case Instruction::IntToPtr:
// Look past no-op inttoptrs if its operand is in the same BB.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return computeCallAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints if its operand is in the same BB.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeCallAddress(U->getOperand(0), Addr);
break;
}
@@ -532,7 +533,7 @@ bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
}
bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) {
- EVT evt = TLI.getValueType(Ty, true);
+ EVT evt = TLI.getValueType(DL, Ty, true);
// Only handle simple types.
if (evt == MVT::Other || !evt.isSimple())
return false;
@@ -931,8 +932,8 @@ bool MipsFastISel::selectFPExt(const Instruction *I) {
if (UnsupportedFPMode)
return false;
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::f32 || DestVT != MVT::f64)
return false;
@@ -998,8 +999,8 @@ bool MipsFastISel::selectFPTrunc(const Instruction *I) {
if (UnsupportedFPMode)
return false;
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::f64 || DestVT != MVT::f32)
return false;
@@ -1415,7 +1416,8 @@ bool MipsFastISel::selectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
CallingConv::ID CC = F.getCallingConv();
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
MipsCCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs,
@@ -1449,7 +1451,7 @@ bool MipsFastISel::selectRet(const Instruction *I) {
if (!MRI.getRegClass(SrcReg)->contains(DestReg))
return false;
- EVT RVEVT = TLI.getValueType(RV->getType());
+ EVT RVEVT = TLI.getValueType(DL, RV->getType());
if (!RVEVT.isSimple())
return false;
@@ -1493,8 +1495,8 @@ bool MipsFastISel::selectTrunc(const Instruction *I) {
Value *Op = I->getOperand(0);
EVT SrcVT, DestVT;
- SrcVT = TLI.getValueType(Op->getType(), true);
- DestVT = TLI.getValueType(I->getType(), true);
+ SrcVT = TLI.getValueType(DL, Op->getType(), true);
+ DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
@@ -1521,8 +1523,8 @@ bool MipsFastISel::selectIntExt(const Instruction *I) {
return false;
EVT SrcEVT, DestEVT;
- SrcEVT = TLI.getValueType(SrcTy, true);
- DestEVT = TLI.getValueType(DestTy, true);
+ SrcEVT = TLI.getValueType(DL, SrcTy, true);
+ DestEVT = TLI.getValueType(DL, DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
@@ -1620,7 +1622,7 @@ unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
}
bool MipsFastISel::selectDivRem(const Instruction *I, unsigned ISDOpcode) {
- EVT DestEVT = TLI.getValueType(I->getType(), true);
+ EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
if (!DestEVT.isSimple())
return false;
@@ -1685,7 +1687,7 @@ bool MipsFastISel::selectShift(const Instruction *I) {
if (!TempReg)
return false;
- MVT Op0MVT = TLI.getValueType(Op0->getType(), true).getSimpleVT();
+ MVT Op0MVT = TLI.getValueType(DL, Op0->getType(), true).getSimpleVT();
bool IsZExt = Opcode == Instruction::LShr;
if (!emitIntExt(Op0MVT, Op0Reg, MVT::i32, TempReg, IsZExt))
return false;
@@ -1803,7 +1805,7 @@ unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V,
unsigned VReg = getRegForValue(V);
if (VReg == 0)
return 0;
- MVT VMVT = TLI.getValueType(V->getType(), true).getSimpleVT();
+ MVT VMVT = TLI.getValueType(DL, V->getType(), true).getSimpleVT();
if ((VMVT == MVT::i8) || (VMVT == MVT::i16)) {
unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
if (!emitIntExt(VMVT, VReg, MVT::i32, TempReg, IsUnsigned))
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 2c9868ac051d..06502397b6b8 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -59,8 +59,9 @@ bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
/// GOT address into a register.
SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
unsigned GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg();
- return CurDAG->getRegister(GlobalBaseReg,
- getTargetLowering()->getPointerTy()).getNode();
+ return CurDAG->getRegister(GlobalBaseReg, getTargetLowering()->getPointerTy(
+ CurDAG->getDataLayout()))
+ .getNode();
}
/// ComplexPattern used on MipsInstrInfo
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 67ddcc4dacb9..fbebb9abb4cc 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -466,7 +466,8 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
return Mips::createFastISel(funcInfo, libInfo);
}
-EVT MipsTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT MipsTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+ EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
@@ -1579,9 +1580,10 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
SDValue Table = Op.getOperand(1);
SDValue Index = Op.getOperand(2);
SDLoc DL(Op);
- EVT PTy = getPointerTy();
+ auto &TD = DAG.getDataLayout();
+ EVT PTy = getPointerTy(TD);
unsigned EntrySize =
- DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(*getDataLayout());
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
Index = DAG.getNode(ISD::MUL, DL, PTy, Index,
DAG.getConstant(EntrySize, DL, PTy));
@@ -1647,10 +1649,10 @@ lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
{
SDLoc DL(Op);
EVT Ty = Op.getOperand(0).getValueType();
- SDValue Cond = DAG.getNode(ISD::SETCC, DL,
- getSetCCResultType(*DAG.getContext(), Ty),
- Op.getOperand(0), Op.getOperand(1),
- Op.getOperand(4));
+ SDValue Cond =
+ DAG.getNode(ISD::SETCC, DL, getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), Ty),
+ Op.getOperand(0), Op.getOperand(1), Op.getOperand(4));
return DAG.getNode(ISD::SELECT, DL, Op.getValueType(), Cond, Op.getOperand(2),
Op.getOperand(3));
@@ -1723,7 +1725,7 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
SDLoc DL(GA);
const GlobalValue *GV = GA->getGlobal();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
TLSModel::Model model = getTargetMachine().getTLSModel(GV);
@@ -1831,7 +1833,7 @@ SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
- getPointerTy());
+ getPointerTy(MF.getDataLayout()));
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
@@ -1850,9 +1852,9 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Node);
unsigned ArgSlotSizeInBytes = (ABI.IsN32() || ABI.IsN64()) ? 8 : 4;
- SDValue VAListLoad = DAG.getLoad(getPointerTy(), DL, Chain, VAListPtr,
- MachinePointerInfo(SV), false, false, false,
- 0);
+ SDValue VAListLoad =
+ DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, Chain, VAListPtr,
+ MachinePointerInfo(SV), false, false, false, 0);
SDValue VAList = VAListLoad;
// Re-align the pointer if necessary.
@@ -1874,7 +1876,9 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
}
// Increment the pointer, VAList, to the next vaarg.
- unsigned ArgSizeInBytes = getDataLayout()->getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext()));
+ auto &TD = DAG.getDataLayout();
+ unsigned ArgSizeInBytes =
+ TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext()));
SDValue Tmp3 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
DAG.getConstant(RoundUpToAlignment(ArgSizeInBytes,
ArgSlotSizeInBytes),
@@ -2062,7 +2066,7 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain,
DAG.getRegister(OffsetReg, Ty),
- DAG.getRegister(AddrReg, getPointerTy()),
+ DAG.getRegister(AddrReg, getPointerTy(MF.getDataLayout())),
Chain.getValue(1));
}
@@ -2479,15 +2483,16 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
SDValue Chain, SDValue Arg, SDLoc DL,
bool IsTailCall, SelectionDAG &DAG) const {
if (!IsTailCall) {
- SDValue PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr,
- DAG.getIntPtrConstant(Offset, DL));
+ SDValue PtrOff =
+ DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr,
+ DAG.getIntPtrConstant(Offset, DL));
return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo(), false,
false, 0);
}
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
int FI = MFI->CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false);
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(),
/*isVolatile=*/ true, false, 0);
}
@@ -2611,8 +2616,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal, DL);
- SDValue StackPtr = DAG.getCopyFromReg(
- Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP, getPointerTy());
+ SDValue StackPtr =
+ DAG.getCopyFromReg(Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP,
+ getPointerTy(DAG.getDataLayout()));
// With EABI is it possible to have 16 args on registers.
std::deque< std::pair<unsigned, SDValue> > RegsToPass;
@@ -2750,7 +2756,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
IsCallReloc = true;
}
} else
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL,
+ getPointerTy(DAG.getDataLayout()), 0,
MipsII::MO_NO_FLAG);
GlobalOrExternal = true;
}
@@ -2758,8 +2765,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const char *Sym = S->getSymbol();
if (!ABI.IsN64() && !IsPIC) // !N64 && static
- Callee =
- DAG.getTargetExternalSymbol(Sym, getPointerTy(), MipsII::MO_NO_FLAG);
+ Callee = DAG.getTargetExternalSymbol(
+ Sym, getPointerTy(DAG.getDataLayout()), MipsII::MO_NO_FLAG);
else if (LargeGOT) {
Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
@@ -3029,7 +3036,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
VA.getLocMemOffset(), true);
// Create load nodes to retrieve arguments from the stack
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
@@ -3174,12 +3181,13 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
- SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+ SDValue Val =
+ DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout()));
unsigned V0 = ABI.IsN64() ? Mips::V0_64 : Mips::V0;
Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(V0, getPointerTy()));
+ RetOps.push_back(DAG.getRegister(V0, getPointerTy(DAG.getDataLayout())));
}
RetOps[0] = Chain; // Update chain.
@@ -3198,9 +3206,8 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
-MipsTargetLowering::ConstraintType MipsTargetLowering::
-getConstraintType(const std::string &Constraint) const
-{
+MipsTargetLowering::ConstraintType
+MipsTargetLowering::getConstraintType(StringRef Constraint) const {
// Mips specific constraints
// GCC config/mips/constraints.md
//
@@ -3290,9 +3297,8 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag
/// that is returned indicates whether parsing was successful. The second flag
/// is true if the numeric part exists.
-static std::pair<bool, bool>
-parsePhysicalReg(StringRef C, std::string &Prefix,
- unsigned long long &Reg) {
+static std::pair<bool, bool> parsePhysicalReg(StringRef C, StringRef &Prefix,
+ unsigned long long &Reg) {
if (C.front() != '{' || C.back() != '}')
return std::make_pair(false, false);
@@ -3300,7 +3306,7 @@ parsePhysicalReg(StringRef C, std::string &Prefix,
StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1;
I = std::find_if(B, E, std::ptr_fun(isdigit));
- Prefix.assign(B, I - B);
+ Prefix = StringRef(B, I - B);
// The second flag is set to false if no numeric characters were found.
if (I == E)
@@ -3316,7 +3322,7 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
const TargetRegisterInfo *TRI =
Subtarget.getRegisterInfo();
const TargetRegisterClass *RC;
- std::string Prefix;
+ StringRef Prefix;
unsigned long long Reg;
std::pair<bool, bool> R = parsePhysicalReg(C, Prefix, Reg);
@@ -3332,7 +3338,7 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
RC = TRI->getRegClass(Prefix == "hi" ?
Mips::HI32RegClassID : Mips::LO32RegClassID);
return std::make_pair(*(RC->begin()), RC);
- } else if (Prefix.compare(0, 4, "$msa") == 0) {
+ } else if (Prefix.startswith("$msa")) {
// Parse $msa(ir|csr|access|save|modify|request|map|unmap)
// No numeric characters follow the name.
@@ -3390,7 +3396,7 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
/// pointer.
std::pair<unsigned, const TargetRegisterClass *>
MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
@@ -3546,8 +3552,8 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-bool MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
@@ -3625,7 +3631,7 @@ void MipsTargetLowering::copyByValRegs(
FrameObjOffset = VA.getLocMemOffset();
// Create frame object.
- EVT PtrTy = getPointerTy();
+ EVT PtrTy = getPointerTy(DAG.getDataLayout());
int FI = MFI->CreateFixedObject(FrameObjSize, FrameObjOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
InVals.push_back(FIN);
@@ -3662,7 +3668,8 @@ void MipsTargetLowering::passByValArg(
unsigned OffsetInBytes = 0; // From beginning of struct
unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes);
- EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
+ EVT PtrTy = getPointerTy(DAG.getDataLayout()),
+ RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
unsigned NumRegs = LastReg - FirstReg;
if (NumRegs) {
@@ -3787,7 +3794,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
unsigned Reg = addLiveIn(MF, ArgRegs[I], RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
- SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
MachinePointerInfo(), false, false, 0);
cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue(
@@ -3920,8 +3927,8 @@ MipsTargetLowering::emitPseudoSELECT(MachineInstr *MI, MachineBasicBlock *BB,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned MipsTargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
// Named registers is expected to be fairly rare. For now, just support $28
// since the linux kernel uses it.
if (Subtarget.isGP64bit()) {
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index bc9a1ce64097..6fe8f830d35d 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -227,7 +227,9 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const override;
- MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i32;
+ }
void LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
@@ -247,7 +249,8 @@ namespace llvm {
const char *getTargetNodeName(unsigned Opcode) const override;
/// getSetCCResultType - get the ISD::SETCC result ValueType
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
@@ -263,7 +266,8 @@ namespace llvm {
void HandleByVal(CCState *, unsigned &, unsigned) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
protected:
SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
@@ -478,8 +482,7 @@ namespace llvm {
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
// Inline asm support
- ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
@@ -493,8 +496,7 @@ namespace llvm {
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
@@ -505,8 +507,8 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "R")
return InlineAsm::Constraint_R;
else if (ConstraintCode == "ZC")
@@ -514,8 +516,8 @@ namespace llvm {
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index ec7bf314c641..096b3bee5d07 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -621,10 +621,17 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
!MFI->hasVarSizedObjects();
}
-void MipsSEFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineRegisterInfo &MRI = MF.getRegInfo();
+/// Mark \p Reg and all registers aliasing it in the bitset.
+void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, unsigned Reg) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ SavedRegs.set(*AI);
+}
+
+void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
MipsABIInfo ABI = STI.getABI();
unsigned FP = ABI.GetFramePtr();
@@ -632,10 +639,10 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Mark $fp as used if function has dedicated frame pointer.
if (hasFP(MF))
- MRI.setPhysRegUsed(FP);
+ setAliasRegs(MF, SavedRegs, FP);
// Mark $s7 as used if function has dedicated base pointer.
if (hasBP(MF))
- MRI.setPhysRegUsed(BP);
+ setAliasRegs(MF, SavedRegs, BP);
// Create spill slots for eh data registers if function calls eh_return.
if (MipsFI->callsEhReturn())
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 2fcd6bbb9a15..9cb32e6c7829 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -34,8 +34,8 @@ public:
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
unsigned ehDataReg(unsigned I) const;
};
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 990a2f8d8c85..cb46d731da29 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -841,7 +841,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
}
case MipsISD::ThreadPointer: {
- EVT PtrVT = getTargetLowering()->getPointerTy();
+ EVT PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout());
unsigned RdhwrOpc, DestReg;
if (PtrVT == MVT::i32) {
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index ae2837a8582c..b319fd07884b 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -838,8 +838,9 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
if (!VT.isVector())
- return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N),
- VT, TL->getScalarShiftAmountTy(VT), DAG);
+ return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), VT,
+ TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
+ DAG);
return SDValue(N, 0);
}
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
deleted file mode 100644
index edd8f670707f..000000000000
--- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the MipsSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MipsTargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "mips-selectiondag-info"
-
-MipsSelectionDAGInfo::MipsSelectionDAGInfo(const DataLayout &DL)
- : TargetSelectionDAGInfo(&DL) {}
-
-MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
-}
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
deleted file mode 100644
index 061423fbeb86..000000000000
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Mips subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPSSELECTIONDAGINFO_H
-#define LLVM_LIB_TARGET_MIPS_MIPSSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class MipsTargetMachine;
-
-class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
- explicit MipsSelectionDAGInfo(const DataLayout &DL);
- ~MipsSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index c41bb16a58ea..471b6e19a8bb 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -70,7 +70,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
- HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(*TM.getDataLayout()),
+ HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(),
InstrInfo(
MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 5f9296812e1c..1db8881404c9 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -18,10 +18,10 @@
#include "MipsFrameLowering.h"
#include "MipsISelLowering.h"
#include "MipsInstrInfo.h"
-#include "MipsSelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -140,7 +140,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
Triple TargetTriple;
- const MipsSelectionDAGInfo TSInfo;
+ const TargetSelectionDAGInfo TSInfo;
std::unique_ptr<const MipsInstrInfo> InstrInfo;
std::unique_ptr<const MipsFrameLowering> FrameLowering;
std::unique_ptr<const MipsTargetLowering> TLInfo;
@@ -275,7 +275,7 @@ public:
void setHelperClassesMips16();
void setHelperClassesMipsSE();
- const MipsSelectionDAGInfo *getSelectionDAGInfo() const override {
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
const MipsInstrInfo *getInstrInfo() const override { return InstrInfo.get(); }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index c820668befa0..1c77745d130b 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -62,7 +62,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
if (!ABI.IsN64())
Ret += "-p:32:32";
- // 8 and 16 bit integers only need no have natural alignment, but try to
+ // 8 and 16 bit integers only need to have natural alignment, but try to
// align them to 32 bits. 64 bit integers have natural alignment.
Ret += "-i8:8:32-i16:16:32-i64:64";
@@ -237,7 +237,7 @@ TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
if (Subtarget->allowMixed16_32()) {
DEBUG(errs() << "No Target Transform Info Pass Added\n");
// FIXME: This is no longer necessary as the TTI returned is per-function.
- return TargetTransformInfo(getDataLayout());
+ return TargetTransformInfo(F.getParent()->getDataLayout());
}
DEBUG(errs() << "Target Transform Info Pass Added\n");
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 221d2f093aeb..ad7302037cad 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -37,7 +37,7 @@ static MCInstrInfo *createNVPTXMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createNVPTXMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
// PTX does not have a return address register.
InitNVPTXMCRegisterInfo(X, 0);
@@ -46,13 +46,13 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *
createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createNVPTXMCSubtargetInfoImpl(TT, CPU, FS);
}
-static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
- StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
// The default relocation model is used regardless of what the client has
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index cadd7a46cd9d..ecb0f0a1d0a1 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -340,7 +340,7 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
}
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Type *Ty = F->getReturnType();
@@ -366,20 +366,20 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
O << ".param .b" << size << " func_retval0";
} else if (isa<PointerType>(Ty)) {
- O << ".param .b" << TLI->getPointerTy().getSizeInBits()
+ O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
<< " func_retval0";
} else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
- unsigned totalsz = TD->getTypeAllocSize(Ty);
+ unsigned totalsz = DL.getTypeAllocSize(Ty);
unsigned retAlignment = 0;
if (!llvm::getAlign(*F, 0, retAlignment))
- retAlignment = TD->getABITypeAlignment(Ty);
+ retAlignment = DL.getABITypeAlignment(Ty);
O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
<< "]";
} else
llvm_unreachable("Unknown return type");
} else {
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, Ty, vtparts);
+ ComputeValueVTs(*TLI, DL, Ty, vtparts);
unsigned idx = 0;
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
@@ -1433,7 +1433,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
bool first = true;
bool isKernelFunc = llvm::isKernelFunction(*F);
bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
- MVT thePointerTy = TLI->getPointerTy();
+ MVT thePointerTy = TLI->getPointerTy(*TD);
O << "(\n";
@@ -1579,7 +1579,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// Further, if a part is vector, print the above for
// each vector element.
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, ETy, vtparts);
+ ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts);
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 09e0bd5d3d88..b75cf4040312 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -80,14 +80,14 @@ static bool IsPTXVectorType(MVT VT) {
/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
/// same number of types as the Ins/Outs arrays in LowerFormalArguments,
/// LowerCall, and LowerReturn.
-static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
- SmallVectorImpl<EVT> &ValueVTs,
+static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<uint64_t> *Offsets = nullptr,
uint64_t StartingOffset = 0) {
SmallVector<EVT, 16> TempVTs;
SmallVector<uint64_t, 16> TempOffsets;
- ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset);
+ ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
EVT VT = TempVTs[i];
uint64_t Off = TempOffsets[i];
@@ -885,15 +885,16 @@ SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
- return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+ return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
}
-std::string
-NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- unsigned retAlignment,
- const ImmutableCallSite *CS) const {
+std::string NVPTXTargetLowering::getPrototype(
+ const DataLayout &DL, Type *retTy, const ArgListTy &Args,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
+ const ImmutableCallSite *CS) const {
+ auto PtrVT = getPointerTy(DL);
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
@@ -921,13 +922,12 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
- O << ".param .b" << getPointerTy().getSizeInBits() << " _";
+ O << ".param .b" << PtrVT.getSizeInBits() << " _";
} else if ((retTy->getTypeID() == Type::StructTyID) ||
isa<VectorType>(retTy)) {
- O << ".param .align "
- << retAlignment
- << " .b8 _["
- << getDataLayout()->getTypeAllocSize(retTy) << "]";
+ auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
+ O << ".param .align " << retAlignment << " .b8 _["
+ << DL.getTypeAllocSize(retTy) << "]";
} else {
llvm_unreachable("Unknown return type");
}
@@ -936,7 +936,6 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
O << "_ (";
bool first = true;
- MVT thePointerTy = getPointerTy();
unsigned OIdx = 0;
for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
@@ -950,24 +949,23 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
if (Ty->isAggregateType() || Ty->isVectorTy()) {
unsigned align = 0;
const CallInst *CallI = cast<CallInst>(CS->getInstruction());
- const DataLayout *TD = getDataLayout();
// +1 because index 0 is reserved for return type alignment
if (!llvm::getAlign(*CallI, i + 1, align))
- align = TD->getABITypeAlignment(Ty);
- unsigned sz = TD->getTypeAllocSize(Ty);
+ align = DL.getABITypeAlignment(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
O << ".param .align " << align << " .b8 ";
O << "_";
O << "[" << sz << "]";
// update the index for Outs
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*this, Ty, vtparts);
+ ComputeValueVTs(*this, DL, Ty, vtparts);
if (unsigned len = vtparts.size())
OIdx += len - 1;
continue;
}
// i8 types in IR will be i16 types in SDAG
- assert((getValueType(Ty) == Outs[OIdx].VT ||
- (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
+ assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
+ (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
"type mismatch between callee prototype and arguments");
// scalar type
unsigned sz = 0;
@@ -976,7 +974,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
if (sz < 32)
sz = 32;
} else if (isa<PointerType>(Ty))
- sz = thePointerTy.getSizeInBits();
+ sz = PtrVT.getSizeInBits();
else
sz = Ty->getPrimitiveSizeInBits();
O << ".param .b" << sz << " ";
@@ -988,7 +986,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
Type *ETy = PTy->getElementType();
unsigned align = Outs[OIdx].Flags.getByValAlign();
- unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
+ unsigned sz = DL.getTypeAllocSize(ETy);
O << ".param .align " << align << " .b8 ";
O << "_";
O << "[" << sz << "]";
@@ -1002,7 +1000,6 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
const ImmutableCallSite *CS,
Type *Ty,
unsigned Idx) const {
- const DataLayout *TD = getDataLayout();
unsigned Align = 0;
const Value *DirectCallee = CS->getCalledFunction();
@@ -1043,7 +1040,8 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
// Call is indirect or alignment information is not available, fall back to
// the ABI type alignment
- return TD->getABITypeAlignment(Ty);
+ auto &DL = CS->getCaller()->getParent()->getDataLayout();
+ return DL.getABITypeAlignment(Ty);
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -1064,9 +1062,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
- const DataLayout *TD = getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
+ auto &DL = MF.getDataLayout();
SDValue tempChain = Chain;
Chain = DAG.getCALLSEQ_START(Chain,
@@ -1096,11 +1094,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// aggregate
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets,
+ 0);
unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
// declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = TD->getTypeAllocSize(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
MVT::i32),
@@ -1137,10 +1136,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
continue;
}
if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
// declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = TD->getTypeAllocSize(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareParamOps[] = { Chain,
DAG.getConstant(align, dl, MVT::i32),
@@ -1321,7 +1320,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<uint64_t, 16> Offsets;
const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
- ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
+ vtparts, &Offsets, 0);
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
@@ -1342,9 +1342,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT elemtype = vtparts[j];
int curOffset = Offsets[j];
unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
- SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
- DAG.getConstant(curOffset, dl, getPointerTy()));
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
+ DAG.getConstant(curOffset, dl, PtrVT));
SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
MachinePointerInfo(), false, false, false,
PartAlign);
@@ -1371,12 +1371,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle Result
if (Ins.size() > 0) {
SmallVector<EVT, 16> resvtparts;
- ComputeValueVTs(*this, retTy, resvtparts);
+ ComputeValueVTs(*this, DL, retTy, resvtparts);
// Declare
// .param .align 16 .b8 retval0[<size-in-bytes>], or
// .param .b<size-in-bits> retval0
- unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
+ unsigned resultsz = DL.getTypeAllocSizeInBits(retTy);
// Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
// these three types to match the logic in
// NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
@@ -1415,7 +1415,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS);
+ std::string Proto =
+ getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS);
const char *ProtoStr =
nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
SDValue ProtoOps[] = {
@@ -1477,7 +1478,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
if (retTy && retTy->isVectorTy()) {
- EVT ObjectVT = getValueType(retTy);
+ EVT ObjectVT = getValueType(DL, retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
@@ -1590,13 +1591,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
InVals.push_back(Elt);
}
- Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
}
}
} else {
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0);
assert(VTs.size() == Ins.size() && "Bad value decomposition");
unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
@@ -1608,8 +1609,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<EVT, 4> LoadRetVTs;
EVT TheLoadType = VTs[i];
- if (retTy->isIntegerTy() &&
- TD->getTypeAllocSizeInBits(retTy) < 32) {
+ if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) {
// This is for integer types only, and specifically not for
// aggregates.
LoadRetVTs.push_back(MVT::i32);
@@ -1920,11 +1920,11 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
}
MemSDNode *MemSD = cast<MemSDNode>(N);
- const DataLayout *TD = getDataLayout();
+ const DataLayout &TD = DAG.getDataLayout();
unsigned Align = MemSD->getAlignment();
unsigned PrefAlign =
- TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
+ TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
if (Align < PrefAlign) {
// This store is not sufficiently aligned, so bail out and let this vector
// store be scalarized. Note that we may still be able to emit smaller
@@ -2064,7 +2064,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
- const DataLayout *TD = getDataLayout();
+ const DataLayout &DL = DAG.getDataLayout();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
@@ -2118,7 +2119,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (Ty->isAggregateType()) {
SmallVector<EVT, 16> vtparts;
- ComputePTXValueVTs(*this, Ty, vtparts);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
assert(vtparts.size() > 0 && "empty aggregate type not expected");
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
@@ -2130,7 +2131,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
continue;
}
if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
for (unsigned parti = 0; parti < NumRegs; ++parti) {
InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
@@ -2156,13 +2157,14 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// NOTE: Here, we lose the ability to issue vector loads for vectors
// that are a part of a struct. This should be investigated in the
// future.
- ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets,
+ 0);
assert(vtparts.size() > 0 && "empty aggregate type not expected");
bool aggregateIsPacked = false;
if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
aggregateIsPacked = STy->isPacked();
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
EVT partVT = vtparts[parti];
@@ -2170,12 +2172,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
PointerType::get(partVT.getTypeForEVT(F->getContext()),
llvm::ADDRESS_SPACE_PARAM));
SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(offsets[parti], dl, getPointerTy()));
- unsigned partAlign =
- aggregateIsPacked ? 1
- : TD->getABITypeAlignment(
- partVT.getTypeForEVT(F->getContext()));
+ DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
+ DAG.getConstant(offsets[parti], dl, PtrVT));
+ unsigned partAlign = aggregateIsPacked
+ ? 1
+ : DL.getABITypeAlignment(
+ partVT.getTypeForEVT(F->getContext()));
SDValue p;
if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
@@ -2198,8 +2200,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
continue;
}
if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(Ty);
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ EVT ObjectVT = getValueType(DL, Ty);
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
unsigned NumElts = ObjectVT.getVectorNumElements();
assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
@@ -2212,9 +2214,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
Value *SrcValue = Constant::getNullValue(PointerType::get(
EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
SDValue P = DAG.getLoad(
- EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
- false, true,
- TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
+ EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
+ true,
+ DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
@@ -2229,9 +2231,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
Value *SrcValue = Constant::getNullValue(PointerType::get(
VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
SDValue P = DAG.getLoad(
- VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
- false, true,
- TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
+ VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
+ true,
+ DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
@@ -2269,13 +2271,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
Value *SrcValue = Constant::getNullValue(
PointerType::get(VecVT.getTypeForEVT(F->getContext()),
llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, dl, getPointerTy()));
+ SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
+ DAG.getConstant(Ofst, dl, PtrVT));
SDValue P = DAG.getLoad(
VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
false, true,
- TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
+ DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
@@ -2288,7 +2289,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
InVals.push_back(Elt);
}
- Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
}
InsIdx += NumElts;
}
@@ -2298,23 +2299,24 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
continue;
}
// A plain scalar.
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
// If ABI, load from the param symbol
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
Value *srcValue = Constant::getNullValue(PointerType::get(
ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
SDValue p;
if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
ISD::SEXTLOAD : ISD::ZEXTLOAD;
- p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
- MachinePointerInfo(srcValue), ObjectVT, false, false,
- false,
- TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ p = DAG.getExtLoad(
+ ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue),
+ ObjectVT, false, false, false,
+ DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
} else {
- p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
- MachinePointerInfo(srcValue), false, false, false,
- TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ p = DAG.getLoad(
+ Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false,
+ false, false,
+ DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
}
if (p.getNode())
p.getNode()->setIROrder(idx + 1);
@@ -2329,10 +2331,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// machine instruction fails because TargetExternalSymbol
// (not lowered) is target dependent, and CopyToReg assumes
// the source is lowered.
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
assert(ObjectVT == Ins[InsIdx].VT &&
"Ins type did not match function type");
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
p.getNode()->setIROrder(idx + 1);
@@ -2370,7 +2372,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
Type *RetTy = F->getReturnType();
- const DataLayout *TD = getDataLayout();
+ const DataLayout &TD = DAG.getDataLayout();
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
@@ -2384,7 +2386,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(NumElts == Outs.size() && "Bad scalarization of return value");
// const_cast can be removed in later LLVM versions
- EVT EltVT = getValueType(RetTy).getVectorElementType();
+ EVT EltVT = getValueType(TD, RetTy).getVectorElementType();
bool NeedExtend = false;
if (EltVT.getSizeInBits() < 16)
NeedExtend = true;
@@ -2435,7 +2437,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
EVT VecVT =
EVT::getVectorVT(F->getContext(), EltVT, VecSize);
unsigned PerStoreOffset =
- TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
for (unsigned i = 0; i < NumElts; i += VecSize) {
// Get values
@@ -2493,7 +2495,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
} else {
SmallVector<EVT, 16> ValVTs;
SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0);
assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
@@ -2509,8 +2511,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
TheValType.getVectorElementType(), TmpVal,
DAG.getIntPtrConstant(j, dl));
EVT TheStoreType = ValVTs[i];
- if (RetTy->isIntegerTy() &&
- TD->getTypeAllocSizeInBits(RetTy) < 32) {
+ if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) {
// The following zero-extension is for integer types only, and
// specifically not for aggregates.
TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
@@ -3291,14 +3292,14 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_ldu_global_i:
case Intrinsic::nvvm_ldu_global_f:
case Intrinsic::nvvm_ldu_global_p: {
-
+ auto &DL = I.getModule()->getDataLayout();
Info.opc = ISD::INTRINSIC_W_CHAIN;
if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
- Info.memVT = getPointerTy();
+ Info.memVT = getPointerTy(DL);
else
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.vol = 0;
@@ -3311,14 +3312,15 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_ldg_global_i:
case Intrinsic::nvvm_ldg_global_f:
case Intrinsic::nvvm_ldg_global_p: {
+ auto &DL = I.getModule()->getDataLayout();
Info.opc = ISD::INTRINSIC_W_CHAIN;
if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
- Info.memVT = getPointerTy();
+ Info.memVT = getPointerTy(DL);
else
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.vol = 0;
@@ -3731,8 +3733,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
/// Used to guide target specific optimizations, like loop strength reduction
/// (LoopStrengthReduce.cpp) and memory optimization for address mode
/// (CodeGenPrepare.cpp)
-bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// AddrMode - This represents an addressing mode of:
@@ -3772,7 +3774,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
NVPTXTargetLowering::ConstraintType
-NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
+NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
@@ -3794,7 +3796,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
std::pair<unsigned, const TargetRegisterClass *>
NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
@@ -4251,7 +4253,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
- const DataLayout *TD,
SmallVectorImpl<SDValue> &Results) {
EVT ResVT = N->getValueType(0);
SDLoc DL(N);
@@ -4282,8 +4283,9 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
LoadSDNode *LD = cast<LoadSDNode>(N);
unsigned Align = LD->getAlignment();
+ auto &TD = DAG.getDataLayout();
unsigned PrefAlign =
- TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
+ TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
if (Align < PrefAlign) {
// This load is not sufficiently aligned, so bail out and let this vector
// load be scalarized. Note that we may still be able to emit smaller
@@ -4495,7 +4497,7 @@ void NVPTXTargetLowering::ReplaceNodeResults(
default:
report_fatal_error("Unhandled custom legalization");
case ISD::LOAD:
- ReplaceLoadVector(N, DAG, getDataLayout(), Results);
+ ReplaceLoadVector(N, DAG, Results);
return;
case ISD::INTRINSIC_W_CHAIN:
ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index ed94775b3002..e5c37321a33b 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -456,24 +456,23 @@ public:
/// Used to guide target specific optimizations, like loop strength
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
/// address mode (CodeGenPrepare.cpp)
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned getFunctionAlignment(const Function *F) const;
- EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override {
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
+ EVT VT) const override {
if (VT.isVector())
return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
return MVT::i1;
}
- ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
SDValue LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
@@ -483,7 +482,7 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
- std::string getPrototype(Type *, const ArgListTy &,
+ std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &,
unsigned retAlignment,
const ImmutableCallSite *CS) const;
@@ -501,7 +500,9 @@ public:
const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
- MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i32;
+ }
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 6ab0fadf9a35..0bf72febc4a0 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -57,7 +57,6 @@ char NVPTXLowerAggrCopies::ID = 0;
// Lower MemTransferInst or load-store pair to loop
static void convertTransferToLoop(
Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
- //unsigned numLoads,
bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
Type *indType = len->getType();
@@ -84,6 +83,8 @@ static void convertTransferToLoop(
ind->addIncoming(ConstantInt::get(indType, 0), origBB);
// load from srcAddr+ind
+ // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
+ // word-sized loads and stores.
Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
srcVolatile);
// store at dstAddr+ind
@@ -137,13 +138,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
//
// Collect all the aggrLoads, aggrMemcpys and addrMemsets.
//
- //const BasicBlock *firstBB = &F.front(); // first BB in F
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
- //BasicBlock *bb = BI;
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
++II) {
if (LoadInst *load = dyn_cast<LoadInst>(II)) {
-
if (!load->hasOneUse())
continue;
@@ -152,7 +150,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
User *use = load->user_back();
if (StoreInst *store = dyn_cast<StoreInst>(use)) {
- if (store->getOperand(0) != load) //getValueOperand
+ if (store->getOperand(0) != load)
continue;
aggrLoads.push_back(load);
}
@@ -188,8 +186,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
//
// Do the transformation of an aggr load/copy/set to a loop
//
- for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) {
- LoadInst *load = aggrLoads[i];
+ for (LoadInst *load : aggrLoads) {
StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
Value *srcAddr = load->getOperand(0);
Value *dstAddr = store->getOperand(1);
@@ -203,20 +200,19 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
load->eraseFromParent();
}
- for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) {
- MemTransferInst *cpy = aggrMemcpys[i];
- Value *len = cpy->getLength();
- // llvm 2.7 version of memcpy does not have volatile
- // operand yet. So always making it non-volatile
- // optimistically, so that we don't see unnecessary
- // st.volatile in ptx
- convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
- false, Context, F);
+ for (MemTransferInst *cpy : aggrMemcpys) {
+ convertTransferToLoop(/* splitAt */ cpy,
+ /* srcAddr */ cpy->getSource(),
+ /* dstAddr */ cpy->getDest(),
+ /* len */ cpy->getLength(),
+ /* srcVolatile */ cpy->isVolatile(),
+ /* dstVolatile */ cpy->isVolatile(),
+ /* Context */ Context,
+ /* Function F */ F);
cpy->eraseFromParent();
}
- for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) {
- MemSetInst *memsetinst = aggrMemsets[i];
+ for (MemSetInst *memsetinst : aggrMemsets) {
Value *len = memsetinst->getLength();
Value *val = memsetinst->getValue();
convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index e83f735a551e..5a83371b07f1 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -2,7 +2,7 @@
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illinois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
@@ -115,7 +115,7 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
replaceImageHandle(Handle, MF);
- return true;
+ return true;
}
return false;
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 71645dca69c5..bd2509a3c8c9 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -48,7 +48,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const NVPTXTargetMachine &TM)
: NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM),
InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)),
- TSInfo(TM.getDataLayout()), FrameLowering() {}
+ FrameLowering() {}
bool NVPTXSubtarget::hasImageHandles() const {
// Enable handles for Kepler+, where CUDA supports indirect surfaces and
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 9d9072efc382..248f9e117d83 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -148,8 +148,9 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); });
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(NVPTXTTIImpl(this, F));
+ });
}
void NVPTXPassConfig::addIRPasses() {
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index dc81802f4b5a..e7250cdba5ac 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -94,7 +94,7 @@ unsigned NVPTXTTIImpl::getArithmeticInstrCost(
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -117,3 +117,15 @@ unsigned NVPTXTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo);
}
}
+
+void NVPTXTTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
+ BaseT::getUnrollingPreferences(L, UP);
+
+ // Enable partial unrolling and runtime unrolling, but reduce the
+ // threshold. This partially unrolls small loops which are often
+ // unrolled by the PTX to SASS compiler and unrolling earlier can be
+ // beneficial.
+ UP.Partial = UP.Runtime = true;
+ UP.PartialThreshold = UP.Threshold / 4;
+}
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 4280888988f9..5bcd1e27a558 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -37,8 +37,9 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
const NVPTXTargetLowering *getTLI() const { return TLI; };
public:
- explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM)
- : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+ explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
NVPTXTTIImpl(const NVPTXTTIImpl &Arg)
@@ -46,18 +47,6 @@ public:
NVPTXTTIImpl(NVPTXTTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- NVPTXTTIImpl &operator=(const NVPTXTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- NVPTXTTIImpl &operator=(NVPTXTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
bool hasBranchDivergence() { return true; }
@@ -69,6 +58,8 @@ public:
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index fe168a547597..c0c83cc258b8 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -33,7 +33,6 @@ add_llvm_target(PowerPCCodeGen
PPCTargetObjectFile.cpp
PPCTargetTransformInfo.cpp
PPCTOCRegDeps.cpp
- PPCSelectionDAGInfo.cpp
PPCTLSDynamicCall.cpp
PPCVSXCopy.cpp
PPCVSXFMAMutate.cpp
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 5c38fe173d96..30f232a9a91e 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -51,10 +51,9 @@ static MCInstrInfo *createPPCMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
- Triple TheTriple(TT);
- bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
- TheTriple.getArch() == Triple::ppc64le);
+static MCRegisterInfo *createPPCMCRegisterInfo(const Triple &TT) {
+ bool isPPC64 =
+ (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
unsigned Flavour = isPPC64 ? 0 : 1;
unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
@@ -65,9 +64,7 @@ static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitPPCMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createPPCMCSubtargetInfoImpl(TT, CPU, FS);
}
static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
@@ -90,22 +87,20 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createPPCMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default) {
- Triple T(TT);
- if (T.isOSDarwin())
+ if (TT.isOSDarwin())
RM = Reloc::DynamicNoPIC;
else
RM = Reloc::Static;
}
if (CM == CodeModel::Default) {
- Triple T(TT);
- if (!T.isOSDarwin() &&
- (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le))
+ if (!TT.isOSDarwin() &&
+ (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
CM = CodeModel::Medium;
}
X->initMCCodeGenInfo(RM, CM, OL);
@@ -231,7 +226,7 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
static MCTargetStreamer *
createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
const Triple &TT = STI.getTargetTriple();
- if (TT.getObjectFormat() == Triple::ELF)
+ if (TT.isOSBinFormatELF())
return new PPCTargetELFStreamer(S);
return new PPCTargetMachOStreamer(S);
}
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 87a5236e711f..199a0debf88b 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -197,7 +197,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
// External or weakly linked global variables need non-lazily-resolved stubs
if (TM.getRelocationModel() != Reloc::Static &&
- (GV->isDeclaration() || GV->isWeakForLinker())) {
+ !GV->isStrongDefinitionForLinker()) {
if (!GV->hasHiddenVisibility()) {
SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
@@ -369,28 +369,70 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
"High 16 bits of call target should be zero.");
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
- EncodedBytes = 6*4;
+ EncodedBytes = 0;
// Materialize the jump address:
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
.addReg(ScratchReg)
.addImm((CallTarget >> 32) & 0xFFFF));
+ ++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(32).addImm(16));
+ ++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm((CallTarget >> 16) & 0xFFFF));
+ ++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(CallTarget & 0xFFFF));
+ // Save the current TOC pointer before the remote call.
+ int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD)
+ .addReg(PPC::X2)
+ .addImm(TOCSaveOffset)
+ .addReg(PPC::X1));
+ ++EncodedBytes;
+
+
+ // If we're on ELFv1, then we need to load the actual function pointer from
+ // the function descriptor.
+ if (!Subtarget->isELFv2ABI()) {
+ // Load the new TOC pointer and the function address, but not r11
+ // (needing this is rare, and loading it here would prevent passing it
+ // via a 'nest' parameter.
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(PPC::X2)
+ .addImm(8)
+ .addReg(ScratchReg));
+ ++EncodedBytes;
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(ScratchReg)
+ .addImm(0)
+ .addReg(ScratchReg));
+ ++EncodedBytes;
+ }
+
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg));
+ ++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
+ ++EncodedBytes;
+
+ // Restore the TOC pointer after the call.
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(PPC::X2)
+ .addImm(TOCSaveOffset)
+ .addReg(PPC::X1));
+ ++EncodedBytes;
}
+ // Each instruction is 4 bytes.
+ EncodedBytes *= 4;
+
// Emit padding.
unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
assert(NumBytes >= EncodedBytes &&
@@ -624,7 +666,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
IsExternal = GV->isDeclaration();
IsCommon = GV->hasCommonLinkage();
IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
- (GV->isDeclaration() || GV->isWeakForLinker());
+ !GV->isStrongDefinitionForLinker();
IsAvailExt = GV->hasAvailableExternallyLinkage();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
@@ -706,7 +748,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = getSymbol(GV);
IsExternal = GV->isDeclaration();
IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
- (GV->isDeclaration() || GV->isWeakForLinker());
+ !GV->isStrongDefinitionForLinker();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 416131745806..baadf081a64c 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -351,8 +351,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
Opcode = ISD::FTRUNC; break;
}
- MVT VTy =
- TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
+ auto &DL = CI->getModule()->getDataLayout();
+ MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(),
+ true);
if (VTy == MVT::Other)
return true;
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 874a6fce0656..5bc9124f8085 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -133,6 +133,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
// register having an odd register number.
CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
+ // The 'nest' parameter, if any, is passed in R11.
+ CCIfNest<CCAssignToReg<[R11]>>,
+
// The first 8 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index fafcd76f9d18..5f236f744fc4 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -262,7 +262,7 @@ static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
// fast-isel, and return its equivalent machine type in VT.
// FIXME: Copied directly from ARM -- factor into base class?
bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
- EVT Evt = TLI.getValueType(Ty, true);
+ EVT Evt = TLI.getValueType(DL, Ty, true);
// Only handle simple types.
if (Evt == MVT::Other || !Evt.isSimple()) return false;
@@ -324,12 +324,13 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
return PPCComputeAddress(U->getOperand(0), Addr);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return PPCComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return PPCComputeAddress(U->getOperand(0), Addr);
break;
case Instruction::GetElementPtr: {
@@ -799,7 +800,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
bool IsZExt, unsigned DestReg) {
Type *Ty = SrcValue1->getType();
- EVT SrcEVT = TLI.getValueType(Ty, true);
+ EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
@@ -893,8 +894,8 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
// Attempt to fast-select a floating-point extend instruction.
bool PPCFastISel::SelectFPExt(const Instruction *I) {
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::f32 || DestVT != MVT::f64)
return false;
@@ -911,8 +912,8 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) {
// Attempt to fast-select a floating-point truncate instruction.
bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::f64 || DestVT != MVT::f32)
return false;
@@ -992,7 +993,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
return false;
Value *Src = I->getOperand(0);
- EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
if (!SrcEVT.isSimple())
return false;
@@ -1157,7 +1158,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
// Attempt to fast-select a binary integer operation that isn't already
// handled automatically.
bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
// We can get here in the case when we have a binary operation on a non-legal
// type and the target independent selector doesn't know how to handle it.
@@ -1594,7 +1595,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -1641,7 +1642,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
unsigned SrcReg = Reg + VA.getValNo();
- EVT RVEVT = TLI.getValueType(RV->getType());
+ EVT RVEVT = TLI.getValueType(DL, RV->getType());
if (!RVEVT.isSimple())
return false;
MVT RVVT = RVEVT.getSimpleVT();
@@ -1769,8 +1770,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
// Attempt to fast-select an integer truncate instruction.
bool PPCFastISel::SelectTrunc(const Instruction *I) {
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
- EVT DestVT = TLI.getValueType(I->getType(), true);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
return false;
@@ -1806,8 +1807,8 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) {
if (!SrcReg) return false;
EVT SrcEVT, DestEVT;
- SrcEVT = TLI.getValueType(SrcTy, true);
- DestEVT = TLI.getValueType(DestTy, true);
+ SrcEVT = TLI.getValueType(DL, SrcTy, true);
+ DestEVT = TLI.getValueType(DL, DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
@@ -1979,7 +1980,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// on the "if" path here.
if (CModel == CodeModel::Large ||
(GV->getType()->getElementType()->isFunctionTy() &&
- (GV->isDeclaration() || GV->isWeakForLinker())) ||
+ !GV->isStrongDefinitionForLinker()) ||
GV->isDeclaration() || GV->hasCommonLinkage() ||
GV->hasAvailableExternallyLinkage())
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
@@ -2127,7 +2128,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple()) return 0;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index b4008e4a886a..87229d80d9c1 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -306,9 +306,10 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UsedRegMask = 0;
for (unsigned i = 0; i != 32; ++i)
- if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ if (MRI.isPhysRegModified(VRRegNo[i]))
UsedRegMask |= 1 << (31-i);
// Live in and live out values already must be in the mask, so don't bother
@@ -1158,9 +1159,11 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void
-PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *) const {
+void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
const PPCRegisterInfo *RegInfo =
static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
@@ -1168,8 +1171,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned LR = RegInfo->getRARegister();
FI->setMustSaveLR(MustSaveLR(MF, LR));
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MRI.setPhysRegUnused(LR);
+ SavedRegs.reset(LR);
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
@@ -1214,9 +1216,9 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
// function uses CR 2, 3, or 4.
if (!isPPC64 && !isDarwinABI &&
- (MRI.isPhysRegUsed(PPC::CR2) ||
- MRI.isPhysRegUsed(PPC::CR3) ||
- MRI.isPhysRegUsed(PPC::CR4))) {
+ (SavedRegs.test(PPC::CR2) ||
+ SavedRegs.test(PPC::CR3) ||
+ SavedRegs.test(PPC::CR4))) {
int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
FI->setCRSpillFrameIndex(FrameIdx);
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 28d074ecd79d..d6a389bfbf0d 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -45,8 +45,8 @@ public:
bool needsFP(const MachineFunction &MF) const;
void replaceFPWithRealFP(MachineFunction &MF) const;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = nullptr) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS = nullptr) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index c85c2610d2f5..01a3acb742e6 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -102,7 +102,8 @@ namespace {
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) {
- return CurDAG->getTargetConstant(Imm, dl, PPCLowering->getPointerTy());
+ return CurDAG->getTargetConstant(
+ Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
}
/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
@@ -313,7 +314,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
const Module *M = MF->getFunction()->getParent();
DebugLoc dl;
- if (PPCLowering->getPointerTy() == MVT::i32) {
+ if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
if (PPCSubTarget->isTargetELF()) {
GlobalBaseReg = PPC::R30;
if (M->getPICLevel() == PICLevel::Small) {
@@ -342,7 +343,8 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
}
}
return CurDAG->getRegister(GlobalBaseReg,
- PPCLowering->getPointerTy()).getNode();
+ PPCLowering->getPointerTy(CurDAG->getDataLayout()))
+ .getNode();
}
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
@@ -2205,7 +2207,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDLoc dl(N);
unsigned Imm;
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
- EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT =
+ CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
if (!PPCSubTarget->useCRBits() &&
@@ -2468,10 +2471,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
- return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
- LD->getValueType(0),
- PPCLowering->getPointerTy(),
- MVT::Other, Ops));
+ return transferMemOperands(
+ N, CurDAG->getMachineNode(
+ Opcode, dl, LD->getValueType(0),
+ PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
+ Ops));
} else {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -2506,10 +2510,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Base, Offset, Chain };
- return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
- LD->getValueType(0),
- PPCLowering->getPointerTy(),
- MVT::Other, Ops));
+ return transferMemOperands(
+ N, CurDAG->getMachineNode(
+ Opcode, dl, LD->getValueType(0),
+ PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
+ Ops));
}
}
@@ -2662,7 +2667,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
}
case ISD::SELECT_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
- EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT =
+ CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
// If this is a select of i1 operands, we'll pattern match it.
@@ -2901,7 +2907,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
if ((GValue->getType()->getElementType()->isFunctionTy() &&
- (GValue->isDeclaration() || GValue->isWeakForLinker())) ||
+ !GValue->isStrongDefinitionForLinker()) ||
GValue->isDeclaration() || GValue->hasCommonLinkage() ||
GValue->hasAvailableExternallyLinkage())
return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
@@ -2915,7 +2921,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Generate a PIC-safe GOT reference.
assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
- return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32);
+ return CurDAG->SelectNodeTo(
+ N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()),
+ MVT::i32);
}
case PPCISD::VADD_SPLAT: {
// This expands into one of three sequences, depending on whether
@@ -3398,9 +3406,8 @@ void PPCDAGToDAGISel::PeepholeCROps() {
bool IsModified;
do {
IsModified = false;
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
- MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+ for (SDNode &Node : CurDAG->allnodes()) {
+ MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
if (!MachineNode || MachineNode->use_empty())
continue;
SDNode *ResNode = MachineNode;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 594472bbb47b..0ed9b051ffed 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -952,7 +952,8 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
-unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const {
// Darwin passes everything on 4 byte boundary.
if (Subtarget.isDarwin())
return 4;
@@ -1055,7 +1056,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
-EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
+ EVT VT) const {
if (!VT.isVector())
return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
@@ -1101,7 +1103,7 @@ static bool isConstantOrUndef(int Op, int Val) {
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
@@ -1132,7 +1134,7 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
@@ -1174,7 +1176,7 @@ bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
if (!Subtarget.hasP8Vector())
return false;
- bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
@@ -1237,7 +1239,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getDataLayout().isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 0, 0);
else if (ShuffleKind == 2) // swapped
@@ -1262,7 +1264,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getDataLayout().isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 8, 8);
else if (ShuffleKind == 2) // swapped
@@ -1352,7 +1354,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
*/
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getDataLayout().isLittleEndian()) {
unsigned indexOffset = CheckEven ? 4 : 0;
if (ShuffleKind == 1) // Unary
return isVMerge(N, indexOffset, 0);
@@ -1399,7 +1401,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
- bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+ bool isLE = DAG.getDataLayout().isLittleEndian();
if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
// Check the rest of the elements to see if they are consecutive.
@@ -1456,7 +1458,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ if (DAG.getDataLayout().isLittleEndian())
return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
else
return SVOp->getMaskElt(0) / EltSize;
@@ -1796,7 +1798,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
}
}
- Disp = DAG.getTargetConstant(0, dl, getPointerTy());
+ Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
@@ -2084,7 +2086,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
bool is64bit = Subtarget.isPPC64();
const Module *M = DAG.getMachineFunction().getFunction()->getParent();
PICLevel::Level picLevel = M->getPICLevel();
@@ -2270,7 +2272,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
SDNode *Node = Op.getNode();
EVT VT = Node->getValueType(0);
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue InChain = Node->getOperand(0);
SDValue VAListPtr = Node->getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
@@ -2399,11 +2401,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
SDLoc dl(Op);
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
- Type *IntPtrTy =
- DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
- *DAG.getContext());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -2440,7 +2440,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
@@ -2476,8 +2476,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
-
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
PtrVT);
@@ -2797,7 +2796,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
@@ -3023,7 +3022,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
assert(!(CallConv == CallingConv::Fast && isVarArg) &&
"fastcc not supported on varargs functions");
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
@@ -3059,12 +3058,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned NumBytes = LinkageSize;
unsigned AvailableFPRs = Num_FPR_Regs;
unsigned AvailableVRs = Num_VR_Regs;
- for (unsigned i = 0, e = Ins.size(); i != e; ++i)
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ if (Ins[i].Flags.isNest())
+ continue;
+
if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
PtrByteSize, LinkageSize, ParamAreaSize,
NumBytes, AvailableFPRs, AvailableVRs,
Subtarget.hasQPX()))
HasParameterArea = true;
+ }
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
@@ -3216,6 +3219,17 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::i1:
case MVT::i32:
case MVT::i64:
+ if (Flags.isNest()) {
+ // The 'nest' parameter, if any, is passed in R11.
+ unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+
+ break;
+ }
+
// These can be scalar arguments or elements of an integer array type
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
@@ -3425,7 +3439,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
bool isPPC64 = PtrVT == MVT::i64;
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
@@ -3845,7 +3859,8 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
return nullptr; // Top 6 bits have to be sext of immediate.
return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op),
- DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+ DAG.getTargetLoweringInfo().getPointerTy(
+ DAG.getDataLayout())).getNode();
}
namespace {
@@ -3991,7 +4006,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
SDLoc dl) {
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
if (!isTailCall) {
if (isVector) {
SDValue StackPtr;
@@ -4053,7 +4068,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
- bool isTailCall, bool IsPatchPoint,
+ bool isTailCall, bool IsPatchPoint, bool hasNest,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
@@ -4062,7 +4077,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
NodeTys.push_back(MVT::Other); // Returns a chain
NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
@@ -4084,8 +4099,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
(Subtarget.getTargetTriple().isMacOSX() &&
Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
- (G->getGlobal()->isDeclaration() ||
- G->getGlobal()->isWeakForLinker())) ||
+ !G->getGlobal()->isStrongDefinitionForLinker()) ||
(Subtarget.isTargetELF() && !isPPC64 &&
!G->getGlobal()->hasLocalLinkage() &&
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
@@ -4196,11 +4210,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
Chain = TOCVal.getValue(0);
InFlag = TOCVal.getValue(1);
- SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
- InFlag);
+ // If the function call has an explicit 'nest' parameter, it takes the
+ // place of the environment pointer.
+ if (!hasNest) {
+ SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
+ InFlag);
- Chain = EnvVal.getValue(0);
- InFlag = EnvVal.getValue(1);
+ Chain = EnvVal.getValue(0);
+ InFlag = EnvVal.getValue(1);
+ }
MTCTROps[0] = Chain;
MTCTROps[1] = LoadFuncPtr;
@@ -4218,7 +4236,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
CallOpc = PPCISD::BCTRL;
Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
- if (isSVR4ABI && isPPC64 && !isELFv2ABI)
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
@@ -4254,8 +4272,7 @@ static
bool isLocalCall(const SDValue &Callee)
{
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- return !G->getGlobal()->isDeclaration() &&
- !G->getGlobal()->isWeakForLinker();
+ return G->getGlobal()->isStrongDefinitionForLinker();
return false;
}
@@ -4308,7 +4325,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SDValue
PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
bool isTailCall, bool isVarArg, bool IsPatchPoint,
- SelectionDAG &DAG,
+ bool hasNest, SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
SDValue InFlag, SDValue Chain,
@@ -4321,8 +4338,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
- SPDiff, isTailCall, IsPatchPoint, RegsToPass,
- Ops, NodeTys, CS, Subtarget);
+ SPDiff, isTailCall, IsPatchPoint, hasNest,
+ RegsToPass, Ops, NodeTys, CS, Subtarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
@@ -4381,7 +4398,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// allocated and an unnecessary move instruction being generated.
CallOpc = PPCISD::BCTRL_LOAD_TOC;
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
@@ -4586,7 +4603,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
unsigned LocMemOffset = ByValVA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
+ StackPtr, PtrOff);
// Create a copy of the argument in the local area of the current
// stack frame.
@@ -4623,7 +4641,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
if (!isTailCall) {
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
+ StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(),
@@ -4664,7 +4683,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+ /* unused except on PPC64 ELFv1 */ false, DAG,
RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
NumBytes, Ins, InVals, CS);
}
@@ -4703,8 +4723,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
+ bool hasNest = false;
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
unsigned PtrByteSize = 8;
MachineFunction &MF = DAG.getMachineFunction();
@@ -4758,6 +4779,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
EVT ArgVT = Outs[i].VT;
EVT OrigVT = Outs[i].ArgVT;
+ if (Flags.isNest())
+ continue;
+
if (CallConv == CallingConv::Fast) {
if (Flags.isByVal())
NumGPRsUsed += (Flags.getByValSize()+7)/8;
@@ -5021,6 +5045,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::i1:
case MVT::i32:
case MVT::i64:
+ if (Flags.isNest()) {
+ // The 'nest' parameter, if any, is passed in R11.
+ RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
+ hasNest = true;
+ break;
+ }
+
// These can be scalar arguments or elements of an integer array type
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
@@ -5302,9 +5333,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
- RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
- NumBytes, Ins, InVals, CS);
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+ hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ Callee, SPDiff, NumBytes, Ins, InVals, CS);
}
SDValue
@@ -5320,7 +5351,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
unsigned NumOps = Outs.size();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
bool isPPC64 = PtrVT == MVT::i64;
unsigned PtrByteSize = isPPC64 ? 8 : 4;
@@ -5693,7 +5724,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+ /* unused except on PPC64 ELFv1 */ false, DAG,
RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
NumBytes, Ins, InVals, CS);
}
@@ -5764,7 +5796,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
// Get the corect type for pointers.
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
// Construct the stack pointer operand.
bool isPPC64 = Subtarget.isPPC64();
@@ -5794,7 +5826,7 @@ SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = Subtarget.isPPC64();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
// Get current frame pointer save index. The users of this index will be
// primarily DYNALLOC instructions.
@@ -5817,7 +5849,7 @@ SDValue
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = Subtarget.isPPC64();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
// Get current frame pointer save index. The users of this index will be
// primarily DYNALLOC instructions.
@@ -5845,7 +5877,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDLoc dl(Op);
// Get the corect type for pointers.
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
// Negate the size.
SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
DAG.getConstant(0, dl, PtrVT), Size);
@@ -5888,8 +5920,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue BasePtr = LD->getBasePtr();
MachineMemOperand *MMO = LD->getMemOperand();
- SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
- BasePtr, MVT::i8, MMO);
+ SDValue NewLD =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
+ BasePtr, MVT::i8, MMO);
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
@@ -5913,7 +5946,8 @@ SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue Value = ST->getValue();
MachineMemOperand *MMO = ST->getMemOperand();
- Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
+ Value);
return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
}
@@ -6374,7 +6408,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
SINT.getOperand(0).getValueType() == MVT::i32) {
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -6419,7 +6453,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// then lfd it and fcfid it.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
SDValue Ld;
if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
@@ -6506,7 +6540,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
// Save FP Control Word to register
EVT NodeTys[] = {
@@ -6727,7 +6761,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
assert(BVN->getNumOperands() == 4 &&
@@ -6760,9 +6794,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
Constant *CP = ConstantVector::get(CV);
- SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(),
- 16 /* alignment */);
-
+ SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
+ 16 /* alignment */);
+
SmallVector<SDValue, 2> Ops;
Ops.push_back(DAG.getEntryNode());
Ops.push_back(CPIdx);
@@ -7453,7 +7487,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
// Store the input value into Value#0 of the stack slot.
@@ -7499,7 +7533,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue StoreChain = DAG.getEntryNode();
@@ -7651,9 +7685,9 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SmallVector<SDValue, 8> Stores;
for (unsigned Idx = 0; Idx < 4; ++Idx) {
- SDValue Ex =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
- DAG.getConstant(Idx, dl, getVectorIdxTy()));
+ SDValue Ex = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+ DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
SDValue Store;
if (ScalarVT != ScalarMemVT)
Store =
@@ -7715,7 +7749,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SmallVector<SDValue, 2> Ops;
@@ -7920,7 +7954,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
assert(N->getValueType(0) == MVT::i1 &&
"Unexpected result type for CTR decrement intrinsic");
- EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
+ EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ N->getValueType(0));
SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
N->getOperand(1));
@@ -8248,7 +8283,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned restoreDstReg = MRI.createVirtualRegister(RC);
- MVT PVT = getPointerTy();
+ MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
// For v = setjmp(buf), we generate
@@ -8386,7 +8421,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
- MVT PVT = getPointerTy();
+ MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
@@ -9032,6 +9067,19 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
+static std::string getRecipOp(const char *Base, EVT VT) {
+ std::string RecipOp(Base);
+ if (VT.getScalarType() == MVT::f64)
+ RecipOp += "d";
+ else
+ RecipOp += "f";
+
+ if (VT.isVector())
+ RecipOp = "vec-" + RecipOp;
+
+ return RecipOp;
+}
+
SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps,
@@ -9043,13 +9091,12 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- // Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. For both FRE and FRSQRTE, the minimum
- // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
- // 2^-14. IEEE float has 23 digits and double has 52 digits.
- RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
- if (VT.getScalarType() == MVT::f64)
- ++RefinementSteps;
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ std::string RecipOp = getRecipOp("sqrt", VT);
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
UseOneConstNR = true;
return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
@@ -9066,13 +9113,12 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- // Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. For both FRE and FRSQRTE, the minimum
- // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
- // 2^-14. IEEE float has 23 digits and double has 52 digits.
- RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
- if (VT.getScalarType() == MVT::f64)
- ++RefinementSteps;
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ std::string RecipOp = getRecipOp("div", VT);
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
return SDValue();
@@ -9854,7 +9900,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
assert(N->getOpcode() == ISD::SIGN_EXTEND &&
"Invalid extension type");
- EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
+ EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
SDValue ShiftCst =
DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
@@ -10145,9 +10191,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
EVT MemVT = LD->getMemoryVT();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
- unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy);
+ unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
if (LD->isUnindexed() && VT.isVector() &&
((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
// P8 and later hardware should just use LOAD.
@@ -10219,7 +10265,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
2*MemVT.getStoreSize()-1);
// Create the new base load.
- SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy());
+ SDValue LDXIntID =
+ DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
SDValue BaseLoad =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
@@ -10243,7 +10290,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (!findConsecutiveLoad(LD, DAG))
--IncValue;
- SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy());
+ SDValue Increment =
+ DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
MachineMemOperand *ExtraMMO =
@@ -10691,7 +10739,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
/// getConstraintType - Given a constraint, return the type of
/// constraint it is for this target.
PPCTargetLowering::ConstraintType
-PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+PPCTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
@@ -10776,7 +10824,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
std::pair<unsigned, const TargetRegisterClass *>
PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
@@ -10923,8 +10971,8 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
-bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// PPC does not allow r+i addressing modes for vectors!
if (Ty->isVectorTy() && AM.BaseOffs != 0)
@@ -10977,22 +11025,22 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
bool isPPC64 = Subtarget.isPPC64();
+ auto PtrVT = getPointerTy(MF.getDataLayout());
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
isPPC64 ? MVT::i64 : MVT::i32);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- DAG.getNode(ISD::ADD, dl, getPointerTy(),
- FrameAddr, Offset),
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address off the stack.
SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, MachinePointerInfo(), false, false, false, 0);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
+ MachinePointerInfo(), false, false, false, 0);
}
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -11000,13 +11048,13 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- bool isPPC64 = PtrVT == MVT::i64;
-
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+ bool isPPC64 = PtrVT == MVT::i64;
+
// Naked functions never have a frame pointer, and so we use r1. For all
// other functions, this decision must be delayed until during PEI.
unsigned FrameReg;
@@ -11026,8 +11074,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
bool isPPC64 = Subtarget.isPPC64();
bool isDarwinABI = Subtarget.isDarwinABI();
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 02242b512a4f..6e13533cfdb3 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -423,7 +423,9 @@ namespace llvm {
/// DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
- MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i32;
+ }
bool isCheapToSpeculateCttz() const override {
return true;
@@ -434,7 +436,8 @@ namespace llvm {
}
/// getSetCCResultType - Return the ISD::SETCC ValueType
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
/// Return true if target always beneficiates from combining into FMA for a
/// given value type. This must typically return false on targets where FMA
@@ -487,7 +490,8 @@ namespace llvm {
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
@@ -519,8 +523,7 @@ namespace llvm {
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const;
- ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
@@ -529,13 +532,13 @@ namespace llvm {
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
- unsigned getByValTypeAlignment(Type *Ty) const override;
+ unsigned getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
@@ -544,8 +547,8 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "es")
return InlineAsm::Constraint_es;
else if (ConstraintCode == "o")
@@ -561,8 +564,8 @@ namespace llvm {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
@@ -745,7 +748,7 @@ namespace llvm {
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
- bool isVarArg, bool IsPatchPoint,
+ bool isVarArg, bool IsPatchPoint, bool hasNest,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 696a83860e53..bf6e40296405 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -57,6 +57,10 @@ static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
cl::Hidden);
+static cl::opt<bool>
+UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
+ cl::desc("Use the old (incorrect) instruction latency calculation"));
+
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
@@ -103,6 +107,35 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
return new ScoreboardHazardRecognizer(II, DAG);
}
+unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (!ItinData || UseOldLatencyCalc)
+ return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
+
+ // The default implementation of getInstrLatency calls getStageLatency, but
+ // getStageLatency does not do the right thing for us. While we have
+ // itinerary, most cores are fully pipelined, and so the itineraries only
+ // express the first part of the pipeline, not every stage. Instead, we need
+ // to use the listed output operand cycle number (using operand 0 here, which
+ // is an output).
+
+ unsigned Latency = 1;
+ unsigned DefClass = MI->getDesc().getSchedClass();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
+ continue;
+
+ int Cycle = ItinData->getOperandCycle(DefClass, i);
+ if (Cycle < 0)
+ continue;
+
+ Latency = std::max(Latency, (unsigned) Cycle);
+ }
+
+ return Latency;
+}
int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index e2d6346aa532..40badae644d6 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -95,6 +95,10 @@ public:
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const override;
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = nullptr) const override;
+
int getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI,
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 43ba4994fde6..20c95fe888e0 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -989,6 +989,18 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
(XVDIVDP $A, $B)>;
+// Reciprocal estimate
+def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
+ (XVRESP $A)>;
+def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
+ (XVREDP $A)>;
+
+// Recip. square root estimate
+def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
+ (XVRSQRTESP $A)>;
+def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
+ (XVRSQRTEDP $A)>;
+
} // AddedComplexity
} // HasVSX
@@ -1013,6 +1025,9 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
v4i32:$XB)))]>;
} // isCommutable
+ def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
+ (XXLEQV $A, $B)>;
+
def XXLORC : XX3Form<60, 170,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlorc $XT, $XA, $XB", IIC_VecGeneral,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 656376c641aa..2b09b2f625de 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -165,8 +165,7 @@ void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
- const PPCFrameLowering *PPCFI =
- static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
// The ZERO register is not really a register, but the representation of r0
// when used in instructions that treat r0 as the constant 0.
@@ -209,7 +208,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
- if (PPCFI->needsFP(MF))
+ if (TFI->needsFP(MF))
Reserved.set(PPC::X31);
if (hasBasePointer(MF))
@@ -230,7 +229,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
- if (PPCFI->needsFP(MF))
+ if (TFI->needsFP(MF))
Reserved.set(PPC::R31);
if (hasBasePointer(MF)) {
@@ -256,8 +255,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
- const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
const unsigned DefaultSafety = 1;
switch (RC->getID()) {
@@ -341,7 +339,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
unsigned FrameSize = MFI->getStackSize();
// Get stack alignments.
- unsigned TargetAlign = Subtarget.getFrameLowering()->getStackAlignment();
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
+ unsigned TargetAlign = TFI->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
"Maximum call-frame size not sufficiently aligned");
@@ -864,8 +863,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
- const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
if (!TM.isPPC64())
return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
@@ -908,10 +906,10 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
}
bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttribute(Attribute::StackAlignment));
@@ -946,11 +944,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
- const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
- const PPCFrameLowering *PPCFI =
- static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
- unsigned StackEst =
- PPCFI->determineFrameLayout(MF, false, true);
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
+ unsigned StackEst = TFI->determineFrameLayout(MF, false, true);
// If we likely don't need a stack frame, then we probably don't need a
// virtual base register either.
@@ -1034,4 +1029,3 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
MI->getOpcode() == TargetOpcode::PATCHPOINT ||
(isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
}
-
diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td
index 635d154d10bf..267f56726180 100644
--- a/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/lib/Target/PowerPC/PPCScheduleP7.td
@@ -315,6 +315,10 @@ def P7Itineraries : ProcessorItineraries<
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_VS1, P7_VS2]>],
[5, 1, 1]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2,
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_VS1, P7_VS2]>],
diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td
index 020739baec3a..69e6d05c6604 100644
--- a/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/lib/Target/PowerPC/PPCScheduleP8.td
@@ -323,6 +323,10 @@ def P8Itineraries : ProcessorItineraries<
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FPU1, P8_FPU2]>],
[5, 1, 1]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [5, 1, 1]>,
InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FPU1, P8_FPU2]>],
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
deleted file mode 100644
index dc1674214769..000000000000
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PPCSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPCTargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "powerpc-selectiondag-info"
-
-PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL)
- : TargetSelectionDAGInfo(DL) {}
-
-PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {}
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
deleted file mode 100644
index 2c1378d5670d..000000000000
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
-#define LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class PPCTargetMachine;
-
-class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
- explicit PPCSelectionDAGInfo(const DataLayout *DL);
- ~PPCSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index cf603fe17723..58daccae90f2 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -53,7 +53,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
TargetTriple.getArch() == Triple::ppc64le),
TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)),
- InstrInfo(*this), TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {}
+ InstrInfo(*this), TLInfo(TM, *this) {}
void PPCSubtarget::initializeEnvironment() {
StackAlignment = 16;
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index e9cc3d4bd5bc..0616c1f65604 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -17,10 +17,10 @@
#include "PPCFrameLowering.h"
#include "PPCISelLowering.h"
#include "PPCInstrInfo.h"
-#include "PPCSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -129,7 +129,7 @@ protected:
PPCFrameLowering FrameLowering;
PPCInstrInfo InstrInfo;
PPCTargetLowering TLInfo;
- PPCSelectionDAGInfo TSInfo;
+ TargetSelectionDAGInfo TSInfo;
public:
/// This constructor initializes the data members to match that
@@ -164,7 +164,7 @@ public:
const PPCTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- const PPCSelectionDAGInfo *getSelectionDAGInfo() const override {
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
const PPCRegisterInfo *getRegisterInfo() const override {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 074bc870751a..1daf244fed44 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -172,7 +172,26 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
TLOF(createTLOF(getTargetTriple())),
- TargetABI(computeTargetABI(TT, Options)) {
+ TargetABI(computeTargetABI(TT, Options)),
+ Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
+
+ // For the estimates, convergence is quadratic, so we essentially double the
+ // number of digits correct after every iteration. For both FRE and FRSQRTE,
+ // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+ // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+ unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+ RefinementSteps64 = RefinementSteps + 1;
+
+ this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
+
+ this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
+
initAsmInfo();
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 5c0f7e629a69..6496339519a1 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -29,6 +29,8 @@ public:
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
PPCABI TargetABI;
+ PPCSubtarget Subtarget;
+
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public:
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 25d563a7d975..e21c2b77f4d7 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -317,7 +317,7 @@ unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 35e7a1497c83..368bef93f0dd 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -38,7 +38,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
public:
explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F)
- : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
PPCTTIImpl(const PPCTTIImpl &Arg)
@@ -46,18 +47,6 @@ public:
PPCTTIImpl(PPCTTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- PPCTTIImpl &operator=(const PPCTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- PPCTTIImpl &operator=(PPCTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
/// \name Scalar TTI Implementations
/// @{
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index f352fa647ace..58d3c3d3fa2e 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -136,6 +136,16 @@ protected:
// source of the copy, it must still be live here. We can't use
// interval testing for a physical register, so as long as we're
// walking the MIs we may as well test liveness here.
+ //
+ // FIXME: There is a case that occurs in practice, like this:
+ // %vreg9<def> = COPY %F1; VSSRC:%vreg9
+ // ...
+ // %vreg6<def> = COPY %vreg9; VSSRC:%vreg6,%vreg9
+ // %vreg7<def> = COPY %vreg9; VSSRC:%vreg7,%vreg9
+ // %vreg9<def,tied1> = XSMADDASP %vreg9<tied0>, %vreg1, %vreg4; VSSRC:
+ // %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg1, %vreg2; VSSRC:
+ // %vreg7<def,tied1> = XSMADDASP %vreg7<tied0>, %vreg1, %vreg3; VSSRC:
+ // which prevents an otherwise-profitable transformation.
bool OtherUsers = false, KillsAddendSrc = false;
for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
J != JE; --J) {
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index e7ab71ac2106..3fb1dcc3d4af 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -80,6 +80,7 @@ struct PPCVSXSwapEntry {
unsigned int IsSwap : 1;
unsigned int MentionsPhysVR : 1;
unsigned int IsSwappable : 1;
+ unsigned int MentionsPartialVR : 1;
unsigned int SpecialHandling : 3;
unsigned int WebRejected : 1;
unsigned int WillRemove : 1;
@@ -91,7 +92,9 @@ enum SHValues {
SH_INSERT,
SH_NOSWAP_LD,
SH_NOSWAP_ST,
- SH_SPLAT
+ SH_SPLAT,
+ SH_XXPERMDI,
+ SH_COPYSCALAR
};
struct PPCVSXSwapRemoval : public MachineFunctionPass {
@@ -167,6 +170,21 @@ private:
isRegInClass(Reg, &PPC::VRRCRegClass));
}
+ // Return true iff the given register is a partial vector register.
+ bool isScalarVecReg(unsigned Reg) {
+ return (isRegInClass(Reg, &PPC::VSFRCRegClass) ||
+ isRegInClass(Reg, &PPC::VSSRCRegClass));
+ }
+
+ // Return true iff the given register mentions all or part of a
+ // vector register. Also sets Partial to true if the mention
+ // is for just the floating-point register overlap of the register.
+ bool isAnyVecReg(unsigned Reg, bool &Partial) {
+ if (isScalarVecReg(Reg))
+ Partial = true;
+ return isScalarVecReg(Reg) || isVecReg(Reg);
+ }
+
public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -223,12 +241,13 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
for (MachineInstr &MI : MBB) {
bool RelevantInstr = false;
+ bool Partial = false;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (isVecReg(Reg)) {
+ if (isAnyVecReg(Reg, Partial)) {
RelevantInstr = true;
break;
}
@@ -250,8 +269,13 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
// Unless noted otherwise, an instruction is considered
// safe for the optimization. There are a large number of
// such true-SIMD instructions (all vector math, logical,
- // select, compare, etc.).
- SwapVector[VecIdx].IsSwappable = 1;
+ // select, compare, etc.). However, if the instruction
+ // mentions a partial vector register and does not have
+ // special handling defined, it is not swappable.
+ if (Partial)
+ SwapVector[VecIdx].MentionsPartialVR = 1;
+ else
+ SwapVector[VecIdx].IsSwappable = 1;
break;
case PPC::XXPERMDI: {
// This is a swap if it is of the form XXPERMDI t, s, s, 2.
@@ -269,25 +293,37 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
VecIdx);
if (trueReg1 == trueReg2)
SwapVector[VecIdx].IsSwap = 1;
- }
+ else {
+ // We can still handle these if the two registers are not
+ // identical, by adjusting the form of the XXPERMDI.
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+ }
// This is a doubleword splat if it is of the form
// XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3. As above we
// must look through chains of copy-likes to find the source
// register. We turn off the marking for mention of a physical
// register, because splatting it is safe; the optimization
- // will not swap the value in the physical register.
- else if (immed == 0 || immed == 3) {
+ // will not swap the value in the physical register. Whether
+ // or not the two input registers are identical, we can handle
+ // these by adjusting the form of the XXPERMDI.
+ } else if (immed == 0 || immed == 3) {
+
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+
unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
VecIdx);
unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
VecIdx);
- if (trueReg1 == trueReg2) {
- SwapVector[VecIdx].IsSwappable = 1;
+ if (trueReg1 == trueReg2)
SwapVector[VecIdx].MentionsPhysVR = 0;
- }
+
+ } else {
+ // We can still handle these by adjusting the form of the XXPERMDI.
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
}
- // Any other form of XXPERMDI is lane-sensitive and unsafe
- // for the optimization.
break;
}
case PPC::LVX:
@@ -324,7 +360,32 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
if (isVecReg(MI.getOperand(0).getReg()) &&
isVecReg(MI.getOperand(1).getReg()))
SwapVector[VecIdx].IsSwappable = 1;
+ // If we have a copy from one scalar floating-point register
+ // to another, we can accept this even if it is a physical
+ // register. The only way this gets involved is if it feeds
+ // a SUBREG_TO_REG, which is handled by introducing a swap.
+ else if (isScalarVecReg(MI.getOperand(0).getReg()) &&
+ isScalarVecReg(MI.getOperand(1).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
+ case PPC::SUBREG_TO_REG: {
+ // These are fine provided they are moving between full vector
+ // register classes. If they are moving from a scalar
+ // floating-point class to a vector class, we can handle those
+ // as well, provided we introduce a swap. It is generally the
+ // case that we will introduce fewer swaps than we remove, but
+ // (FIXME) a cost model could be used. However, introduced
+ // swaps could potentially be CSEd, so this is not trivial.
+ if (isVecReg(MI.getOperand(0).getReg()) &&
+ isVecReg(MI.getOperand(2).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ else if (isVecReg(MI.getOperand(0).getReg()) &&
+ isScalarVecReg(MI.getOperand(2).getReg())) {
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR;
+ }
break;
+ }
case PPC::VSPLTB:
case PPC::VSPLTH:
case PPC::VSPLTW:
@@ -425,6 +486,10 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
case PPC::VUPKLSW:
case PPC::XXMRGHW:
case PPC::XXMRGLW:
+ // XXSLDWI could be replaced by a general permute with one of three
+ // permute control vectors (for shift values 1, 2, 3). However,
+ // VPERM has a more restrictive register class.
+ case PPC::XXSLDWI:
case PPC::XXSPLTW:
break;
}
@@ -501,18 +566,20 @@ void PPCVSXSwapRemoval::formWebs() {
DEBUG(MI->dump());
// It's sufficient to walk vector uses and join them to their unique
- // definitions. In addition, check *all* vector register operands
- // for physical regs.
+ // definitions. In addition, check full vector register operands
+ // for physical regs. We exclude partial-vector register operands
+ // because we can handle them if copied to a full vector.
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!isVecReg(Reg))
+ if (!isVecReg(Reg) && !isScalarVecReg(Reg))
continue;
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- SwapVector[EntryIdx].MentionsPhysVR = 1;
+ if (!(MI->isCopy() && isScalarVecReg(Reg)))
+ SwapVector[EntryIdx].MentionsPhysVR = 1;
continue;
}
@@ -545,15 +612,21 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
- // Reject webs containing mentions of physical registers, or containing
- // operations that we don't know how to handle in a lane-permuted region.
+ // If representative is already rejected, don't waste further time.
+ if (SwapVector[Repr].WebRejected)
+ continue;
+
+ // Reject webs containing mentions of physical or partial registers, or
+ // containing operations that we don't know how to handle in a lane-
+ // permuted region.
if (SwapVector[EntryIdx].MentionsPhysVR ||
+ SwapVector[EntryIdx].MentionsPartialVR ||
!(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
SwapVector[Repr].WebRejected = 1;
DEBUG(dbgs() <<
- format("Web %d rejected for physreg, subreg, or not swap[pable]\n",
+ format("Web %d rejected for physreg, partial reg, or not swap[pable]\n",
Repr));
DEBUG(dbgs() << " in " << EntryIdx << ": ");
DEBUG(SwapVector[EntryIdx].VSEMI->dump());
@@ -588,7 +661,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
}
}
- // Reject webs than contain swapping stores that are fed by something
+ // Reject webs that contain swapping stores that are fed by something
// other than a swap instruction.
} else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
@@ -670,7 +743,8 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
// The identified swap entry requires special handling to allow its
// containing computation to be optimized. Perform that handling
// here.
-// FIXME: This code is to be phased in with subsequent patches.
+// FIXME: Additional opportunities will be phased in with subsequent
+// patches.
void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
switch (SwapVector[EntryIdx].SpecialHandling) {
@@ -704,6 +778,91 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
break;
}
+ // For an XXPERMDI that isn't handled otherwise, we need to
+ // reverse the order of the operands. If the selector operand
+ // has a value of 0 or 3, we need to change it to 3 or 0,
+ // respectively. Otherwise we should leave it alone. (This
+ // is equivalent to reversing the two bits of the selector
+ // operand and complementing the result.)
+ case SHValues::SH_XXPERMDI: {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+ DEBUG(dbgs() << "Changing XXPERMDI: ");
+ DEBUG(MI->dump());
+
+ unsigned Selector = MI->getOperand(3).getImm();
+ if (Selector == 0 || Selector == 3)
+ Selector = 3 - Selector;
+ MI->getOperand(3).setImm(Selector);
+
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+
+ DEBUG(dbgs() << " Into: ");
+ DEBUG(MI->dump());
+ break;
+ }
+
+ // For a copy from a scalar floating-point register to a vector
+ // register, removing swaps will leave the copied value in the
+ // wrong lane. Insert a swap following the copy to fix this.
+ case SHValues::SH_COPYSCALAR: {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+ DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
+ DEBUG(MI->dump());
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+
+ MI->getOperand(0).setReg(NewVReg);
+ DEBUG(dbgs() << " Into: ");
+ DEBUG(MI->dump());
+
+ MachineBasicBlock::iterator InsertPoint = MI->getNextNode();
+
+ // Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG
+ // is copying to a VRRC, we need to be careful to avoid a register
+ // assignment problem. In this case we must copy from VRRC to VSRC
+ // prior to the swap, and from VSRC to VRRC following the swap.
+ // Coalescing will usually remove all this mess.
+
+ if (DstRC == &PPC::VRRCRegClass) {
+ unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+ unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::COPY), VSRCTmp1)
+ .addReg(NewVReg);
+ DEBUG(MI->getNextNode()->dump());
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::XXPERMDI), VSRCTmp2)
+ .addReg(VSRCTmp1)
+ .addReg(VSRCTmp1)
+ .addImm(2);
+ DEBUG(MI->getNextNode()->getNextNode()->dump());
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::COPY), DstReg)
+ .addReg(VSRCTmp2);
+ DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump());
+
+ } else {
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::XXPERMDI), DstReg)
+ .addReg(NewVReg)
+ .addReg(NewVReg)
+ .addImm(2);
+
+ DEBUG(MI->getNextNode()->dump());
+ }
+ break;
+ }
}
}
@@ -756,6 +915,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
DEBUG(dbgs() << "swap ");
if (SwapVector[EntryIdx].MentionsPhysVR)
DEBUG(dbgs() << "physreg ");
+ if (SwapVector[EntryIdx].MentionsPartialVR)
+ DEBUG(dbgs() << "partialreg ");
if (SwapVector[EntryIdx].IsSwappable) {
DEBUG(dbgs() << "swappable ");
@@ -780,6 +941,12 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
case SH_SPLAT:
DEBUG(dbgs() << "special:splat ");
break;
+ case SH_XXPERMDI:
+ DEBUG(dbgs() << "special:xxpermdi ");
+ break;
+ case SH_COPYSCALAR:
+ DEBUG(dbgs() << "special:copyscalar ");
+ break;
}
}
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index c486411f9a1e..5b7bfdd28020 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -22,7 +22,6 @@ add_llvm_target(SparcCodeGen
SparcRegisterInfo.cpp
SparcSubtarget.cpp
SparcTargetMachine.cpp
- SparcSelectionDAGInfo.cpp
SparcMCInstLower.cpp
SparcTargetObjectFile.cpp
)
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 91d2eeef0cc0..9113e4a46b96 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -57,7 +57,7 @@ static MCInstrInfo *createSparcMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createSparcMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitSparcMCRegisterInfo(X, SP::O7);
return X;
@@ -65,11 +65,9 @@ static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *
createSparcMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
if (CPU.empty())
CPU = (TT.getArch() == Triple::sparcv9) ? "v9" : "v8";
- InitSparcMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createSparcMCSubtargetInfoImpl(TT, CPU, FS);
}
// Code models. Some only make sense for 64-bit code.
@@ -83,7 +81,8 @@ createSparcMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
//
// All code models require that the text segment is smaller than 2GB.
-static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createSparcMCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
@@ -100,7 +99,8 @@ static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createSparcV9MCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index bccc6bdd53eb..8fa10dcae114 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -190,11 +190,11 @@ static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
{
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
- if (MRI->isPhysRegUsed(reg))
+ if (!MRI->reg_nodbg_empty(reg))
return false;
for (unsigned reg = SP::L0; reg <= SP::L7; ++reg)
- if (MRI->isPhysRegUsed(reg))
+ if (!MRI->reg_nodbg_empty(reg))
return false;
return true;
@@ -206,10 +206,10 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
- return !(MFI->hasCalls() // has calls
- || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
- || MRI.isPhysRegUsed(SP::O6) // %SP is used
- || hasFP(MF)); // need %FP
+ return !(MFI->hasCalls() // has calls
+ || !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed
+ || !MRI.reg_nodbg_empty(SP::O6) // %SP is used
+ || hasFP(MF)); // need %FP
}
void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
@@ -218,16 +218,13 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
// Remap %i[0-7] to %o[0-7].
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
- if (!MRI.isPhysRegUsed(reg))
+ if (MRI.reg_nodbg_empty(reg))
continue;
unsigned mapped_reg = (reg - SP::I0 + SP::O0);
- assert(!MRI.isPhysRegUsed(mapped_reg));
+ assert(MRI.reg_nodbg_empty(mapped_reg));
// Replace I register with O register.
MRI.replaceRegWith(reg, mapped_reg);
-
- // Mark the reg unused.
- MRI.setPhysRegUnused(reg);
}
// Rewrite MBB's Live-ins.
@@ -247,9 +244,10 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
#endif
}
-void SparcFrameLowering::processFunctionBeforeCalleeSavedScan
- (MachineFunction &MF, RegScavenger *RS) const {
-
+void SparcFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
if (!DisableLeafProc && isLeafProc(MF)) {
SparcMachineFunctionInfo *MFI = MF.getInfo<SparcMachineFunctionInfo>();
MFI->setLeafProc(true);
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index bb3b78861cbd..29fc7b7ba036 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -36,8 +36,8 @@ public:
bool hasReservedCallFrame(const MachineFunction &MF) const override;
bool hasFP(const MachineFunction &MF) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = nullptr) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS = nullptr) const override;
private:
// Remap input registers to output registers for leaf procedure.
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 9c594a9f0f65..340b72e7940f 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -67,13 +67,16 @@ private:
SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
unsigned GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF);
- return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
+ return CurDAG->getRegister(GlobalBaseReg,
+ TLI->getPointerTy(CurDAG->getDataLayout()))
+ .getNode();
}
bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
SDValue &Base, SDValue &Offset) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FIN->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout()));
Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
return true;
}
@@ -88,8 +91,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
if (FrameIndexSDNode *FIN =
dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
// Constant offset from frame ref.
- Base =
- CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FIN->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout()));
} else {
Base = Addr.getOperand(0);
}
@@ -134,7 +137,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
}
R1 = Addr;
- R2 = CurDAG->getRegister(SP::G0, TLI->getPointerTy());
+ R2 = CurDAG->getRegister(SP::G0, TLI->getPointerTy(CurDAG->getDataLayout()));
return true;
}
@@ -168,10 +171,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
} else {
TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
}
- TopPart = SDValue(CurDAG->getMachineNode(SP::WRASRrr, dl, MVT::i32,
- TopPart,
- CurDAG->getRegister(SP::G0, MVT::i32)), 0);
- TopPart = CurDAG->getCopyToReg(TopPart, dl, SP::Y, TopPart, SDValue()).getValue(1);
+ TopPart = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SP::Y, TopPart,
+ SDValue())
+ .getValue(1);
// FIXME: Handle div by immediate.
unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
@@ -184,12 +186,11 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
SDValue MulLHS = N->getOperand(0);
SDValue MulRHS = N->getOperand(1);
unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
- SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
- MulLHS, MulRHS);
- // The high part is in the Y register.
- return CurDAG->SelectNodeTo(N, SP::RDASR, MVT::i32,
- CurDAG->getRegister(SP::Y, MVT::i32),
- SDValue(Mul, 1));
+ SDNode *Mul =
+ CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, MulLHS, MulRHS);
+ SDValue ResultHigh = SDValue(Mul, 1);
+ ReplaceUses(SDValue(N, 0), ResultHigh);
+ return nullptr;
}
}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 0481676dc1ac..4879d4ee79e5 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -221,10 +221,11 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
unsigned Reg = SFI->getSRetReturnReg();
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
- SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, PtrVT);
Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag);
Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
+ RetOps.push_back(DAG.getRegister(SP::I0, PtrVT));
RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
}
@@ -418,6 +419,7 @@ LowerFormalArguments_32(SDValue Chain,
assert(VA.isMemLoc());
unsigned Offset = VA.getLocMemOffset()+StackOffset;
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
if (VA.needsCustom()) {
assert(VA.getValVT() == MVT::f64);
@@ -426,7 +428,7 @@ LowerFormalArguments_32(SDValue Chain,
int FI = MF.getFrameInfo()->CreateFixedObject(8,
Offset,
true);
- SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
MachinePointerInfo(),
false,false, false, 0);
@@ -437,14 +439,14 @@ LowerFormalArguments_32(SDValue Chain,
int FI = MF.getFrameInfo()->CreateFixedObject(4,
Offset,
true);
- SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
MachinePointerInfo(),
false, false, false, 0);
int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
Offset+4,
true);
- SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy());
+ SDValue FIPtr2 = DAG.getFrameIndex(FI2, PtrVT);
SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
MachinePointerInfo(),
@@ -460,7 +462,7 @@ LowerFormalArguments_32(SDValue Chain,
int FI = MF.getFrameInfo()->CreateFixedObject(4,
Offset,
true);
- SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
SDValue Load ;
if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
@@ -607,10 +609,10 @@ LowerFormalArguments_64(SDValue Chain,
if (VA.isExtInLoc())
Offset += 8 - ValSize;
int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true);
- InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain,
- DAG.getFrameIndex(FI, getPointerTy()),
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0));
+ InVals.push_back(DAG.getLoad(
+ VA.getValVT(), DL, Chain,
+ DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
+ MachinePointerInfo::getFixedStack(FI), false, false, false, 0));
}
if (!IsVarArg)
@@ -637,10 +639,10 @@ LowerFormalArguments_64(SDValue Chain,
unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true);
- OutChains.push_back(DAG.getStore(Chain, DL, VArg,
- DAG.getFrameIndex(FI, getPointerTy()),
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ auto PtrVT = getPointerTy(MF.getDataLayout());
+ OutChains.push_back(
+ DAG.getStore(Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
+ MachinePointerInfo::getFixedStack(FI), false, false, 0));
}
if (!OutChains.empty())
@@ -722,7 +724,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
unsigned Align = Flags.getByValAlign();
int FI = MFI->CreateStackObject(Size, Align, false);
- SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue SizeNode = DAG.getConstant(Size, dl, MVT::i32);
Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
@@ -993,7 +995,7 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
Type *ElementTy = Ty->getElementType();
- return getDataLayout()->getTypeAllocSize(ElementTy);
+ return DAG.getDataLayout().getTypeAllocSize(ElementTy);
}
@@ -1057,6 +1059,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
SelectionDAG &DAG = CLI.DAG;
SDLoc DL = CLI.DL;
SDValue Chain = CLI.Chain;
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
// Sparc target does not yet support tail call optimization.
CLI.IsTailCall = false;
@@ -1130,13 +1133,11 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Store and reload into the interger register reg and reg+1.
unsigned Offset = 8 * (VA.getLocReg() - SP::I0);
unsigned StackOffset = Offset + Subtarget->getStackPointerBias() + 128;
- SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
+ SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT);
SDValue HiPtrOff = DAG.getIntPtrConstant(StackOffset, DL);
- HiPtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr,
- HiPtrOff);
+ HiPtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, HiPtrOff);
SDValue LoPtrOff = DAG.getIntPtrConstant(StackOffset + 8, DL);
- LoPtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr,
- LoPtrOff);
+ LoPtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, LoPtrOff);
// Store to %sp+BIAS+128+Offset
SDValue Store = DAG.getStore(Chain, DL, Arg, HiPtrOff,
@@ -1180,13 +1181,13 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
assert(VA.isMemLoc());
// Create a store off the stack pointer for this argument.
- SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
+ SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT);
// The argument area starts at %fp+BIAS+128 in the callee frame,
// %sp+BIAS+128 in ours.
SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
Subtarget->getStackPointerBias() +
128, DL);
- PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff,
MachinePointerInfo(),
false, false, 0));
@@ -1215,10 +1216,9 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
unsigned TF = ((getTargetMachine().getRelocationModel() == Reloc::PIC_)
? SparcMCExpr::VK_Sparc_WPLT30 : 0);
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
- TF);
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT, 0, TF);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy(), TF);
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, TF);
// Build the operands for the call instruction itself.
SmallVector<SDValue, 8> Ops;
@@ -1370,6 +1370,8 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
const SparcSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
+ auto &DL = *TM.getDataLayout();
+
// Set up the register classes.
addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
@@ -1394,10 +1396,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
// Custom legalize GlobalAddress nodes into LO/HI parts.
- setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom);
- setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom);
- setOperationAction(ISD::ConstantPool, getPointerTy(), Custom);
- setOperationAction(ISD::BlockAddress, getPointerTy(), Custom);
+ setOperationAction(ISD::GlobalAddress, getPointerTy(DL), Custom);
+ setOperationAction(ISD::GlobalTLSAddress, getPointerTy(DL), Custom);
+ setOperationAction(ISD::ConstantPool, getPointerTy(DL), Custom);
+ setOperationAction(ISD::BlockAddress, getPointerTy(DL), Custom);
// Sparc doesn't have sext_inreg, replace them with shl/sra
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -1704,7 +1706,8 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
-EVT SparcTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT SparcTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+ EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
@@ -1804,7 +1807,7 @@ SDValue SparcTargetLowering::makeHiLoPair(SDValue Op,
// or ExternalSymbol SDNode.
SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
- EVT VT = getPointerTy();
+ EVT VT = getPointerTy(DAG.getDataLayout());
// Handle PIC mode first.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
@@ -1871,7 +1874,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
SDLoc DL(GA);
const GlobalValue *GV = GA->getGlobal();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
TLSModel::Model model = getTargetMachine().getTLSModel(GV);
@@ -1983,7 +1986,7 @@ SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args,
if (ArgTy->isFP128Ty()) {
// Create a stack object and pass the pointer to the library function.
int FI = MFI->CreateStackObject(16, 8, false);
- SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
Chain = DAG.getStore(Chain,
DL,
Entry.Node,
@@ -2008,8 +2011,9 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
ArgListTy Args;
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue Callee = DAG.getExternalSymbol(LibFuncName, getPointerTy());
+ SDValue Callee = DAG.getExternalSymbol(LibFuncName, PtrVT);
Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
Type *RetTyABI = RetTy;
SDValue Chain = DAG.getEntryNode();
@@ -2019,7 +2023,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
// Create a Stack Object to receive the return value of type f128.
ArgListEntry Entry;
int RetFI = MFI->CreateStackObject(16, 8, false);
- RetPtr = DAG.getFrameIndex(RetFI, getPointerTy());
+ RetPtr = DAG.getFrameIndex(RetFI, PtrVT);
Entry.Node = RetPtr;
Entry.Ty = PointerType::getUnqual(RetTy);
if (!Subtarget->is64Bit())
@@ -2082,7 +2086,8 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
case SPCC::FCC_UE : LibCall = is64Bit? "_Qp_cmp" : "_Q_cmp"; break;
}
- SDValue Callee = DAG.getExternalSymbol(LibCall, getPointerTy());
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Callee = DAG.getExternalSymbol(LibCall, PtrVT);
Type *RetTy = Type::getInt32Ty(*DAG.getContext());
ArgListTy Args;
SDValue Chain = DAG.getEntryNode();
@@ -2362,6 +2367,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI) {
MachineFunction &MF = DAG.getMachineFunction();
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+ auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
// Need frame address to find the address of VarArgsFrameIndex.
MF.getFrameInfo()->setFrameAddressIsTaken(true);
@@ -2370,9 +2376,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
// memory location argument.
SDLoc DL(Op);
SDValue Offset =
- DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(),
- DAG.getRegister(SP::I6, TLI.getPointerTy()),
- DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
+ DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(SP::I6, PtrVT),
+ DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
@@ -2497,8 +2502,8 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
SDValue RetAddr;
if (depth == 0) {
- unsigned RetReg = MF.addLiveIn(SP::I7,
- TLI.getRegClassFor(TLI.getPointerTy()));
+ auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ unsigned RetReg = MF.addLiveIn(SP::I7, TLI.getRegClassFor(PtrVT));
RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
return RetAddr;
}
@@ -3065,7 +3070,7 @@ SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
SparcTargetLowering::ConstraintType
-SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
+SparcTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
@@ -3139,7 +3144,7 @@ LowerAsmOperandForConstraint(SDValue Op,
std::pair<unsigned, const TargetRegisterClass *>
SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index b6bc3d255713..bbc91a493c9d 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -72,7 +72,7 @@ namespace llvm {
const char *getTargetNodeName(unsigned Opcode) const override;
- ConstraintType getConstraintType(const std::string &Constraint) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo &info,
const char *constraint) const override;
@@ -82,14 +82,16 @@ namespace llvm {
SelectionDAG &DAG) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
- MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i32;
+ }
/// getSetCCResultType - Return the ISD::SETCC ValueType
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
SDValue
LowerFormalArguments(SDValue Chain,
diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td
index 670e9e989c81..25cc652dbd9e 100644
--- a/lib/Target/Sparc/SparcInstrAliases.td
+++ b/lib/Target/Sparc/SparcInstrAliases.td
@@ -245,6 +245,7 @@ multiclass fp_cond_alias<string cond, int condVal> {
}
defm : int_cond_alias<"a", 0b1000>;
+defm : int_cond_alias<"", 0b1000>; // same as a; gnu asm, not in manual
defm : int_cond_alias<"n", 0b0000>;
defm : int_cond_alias<"ne", 0b1001>;
defm : int_cond_alias<"nz", 0b1001>; // same as ne
@@ -266,6 +267,7 @@ defm : int_cond_alias<"vc", 0b1111>;
defm : int_cond_alias<"vs", 0b0111>;
defm : fp_cond_alias<"a", 0b0000>;
+defm : fp_cond_alias<"", 0b0000>; // same as a; gnu asm, not in manual
defm : fp_cond_alias<"n", 0b1000>;
defm : fp_cond_alias<"u", 0b0111>;
defm : fp_cond_alias<"g", 0b0110>;
@@ -284,7 +286,16 @@ defm : fp_cond_alias<"le", 0b1101>;
defm : fp_cond_alias<"ule", 0b1110>;
defm : fp_cond_alias<"o", 0b1111>;
-// Instruction aliases for JMPL.
+// Section A.3 Synthetic Instructions
+
+// Most are marked as Emit=0, so that they are not used for disassembly. This is
+// an aesthetic issue, but the chosen policy is to typically prefer using the
+// non-alias form, except for the most obvious and clarifying aliases: cmp, jmp,
+// call, tst, ret, retl.
+
+// Note: cmp is handled in SparcInstrInfo.
+// jmp/call/ret/retl have special case handling for output in
+// SparcInstPrinter.cpp
// jmp addr -> jmpl addr, %g0
def : InstAlias<"jmp $addr", (JMPLrr G0, MEMrr:$addr), 0>;
@@ -294,25 +305,129 @@ def : InstAlias<"jmp $addr", (JMPLri G0, MEMri:$addr), 0>;
def : InstAlias<"call $addr", (JMPLrr O7, MEMrr:$addr), 0>;
def : InstAlias<"call $addr", (JMPLri O7, MEMri:$addr), 0>;
-// retl -> RETL 8
-def : InstAlias<"retl", (RETL 8)>;
+// tst reg -> orcc %g0, reg, %g0
+def : InstAlias<"tst $rs2", (ORCCrr G0, IntRegs:$rs2, G0)>;
-// ret -> RET 8
+// ret -> jmpl %i7+8, %g0 (aka RET 8)
def : InstAlias<"ret", (RET 8)>;
-// mov reg, rd -> or %g0, reg, rd
-def : InstAlias<"mov $rs2, $rd", (ORrr IntRegs:$rd, G0, IntRegs:$rs2)>;
+// retl -> jmpl %o7+8, %g0 (aka RETL 8)
+def : InstAlias<"retl", (RETL 8)>;
-// mov simm13, rd -> or %g0, simm13, rd
-def : InstAlias<"mov $simm13, $rd", (ORri IntRegs:$rd, G0, i32imm:$simm13)>;
+// restore -> restore %g0, %g0, %g0
+def : InstAlias<"restore", (RESTORErr G0, G0, G0)>;
+
+// save -> restore %g0, %g0, %g0
+def : InstAlias<"save", (SAVErr G0, G0, G0)>;
// set value, rd
// (turns into a sequence of sethi+or, depending on the value)
// def : InstAlias<"set $val, $rd", (ORri IntRegs:$rd, (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
def SET : AsmPseudoInst<(outs IntRegs:$rd), (ins i32imm:$val), "set $val, $rd">;
-// restore -> restore %g0, %g0, %g0
-def : InstAlias<"restore", (RESTORErr G0, G0, G0)>;
+// not rd -> xnor rd, %g0, rd
+def : InstAlias<"not $rd", (XNORrr IntRegs:$rd, IntRegs:$rd, G0), 0>;
+
+// not reg, rd -> xnor reg, %g0, rd
+def : InstAlias<"not $rs1, $rd", (XNORrr IntRegs:$rd, IntRegs:$rs1, G0), 0>;
+
+// neg rd -> sub %g0, rd, rd
+def : InstAlias<"neg $rd", (SUBrr IntRegs:$rd, G0, IntRegs:$rd), 0>;
+
+// neg reg, rd -> sub %g0, reg, rd
+def : InstAlias<"neg $rs2, $rd", (SUBrr IntRegs:$rd, G0, IntRegs:$rs2), 0>;
+
+// inc rd -> add rd, 1, rd
+def : InstAlias<"inc $rd", (ADDri IntRegs:$rd, IntRegs:$rd, 1), 0>;
+
+// inc simm13, rd -> add rd, simm13, rd
+def : InstAlias<"inc $simm13, $rd", (ADDri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// inccc rd -> addcc rd, 1, rd
+def : InstAlias<"inccc $rd", (ADDCCri IntRegs:$rd, IntRegs:$rd, 1), 0>;
+
+// inccc simm13, rd -> addcc rd, simm13, rd
+def : InstAlias<"inccc $simm13, $rd", (ADDCCri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// dec rd -> sub rd, 1, rd
+def : InstAlias<"dec $rd", (SUBri IntRegs:$rd, IntRegs:$rd, 1), 0>;
+
+// dec simm13, rd -> sub rd, simm13, rd
+def : InstAlias<"dec $simm13, $rd", (SUBri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// deccc rd -> subcc rd, 1, rd
+def : InstAlias<"deccc $rd", (SUBCCri IntRegs:$rd, IntRegs:$rd, 1), 0>;
+
+// deccc simm13, rd -> subcc rd, simm13, rd
+def : InstAlias<"deccc $simm13, $rd", (SUBCCri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// btst reg_or_imm, reg -> andcc reg,reg_or_imm,%g0
+def : InstAlias<"btst $rs2, $rs1", (ANDCCrr G0, IntRegs:$rs1, IntRegs:$rs2), 0>;
+def : InstAlias<"btst $simm13, $rs1", (ANDCCri G0, IntRegs:$rs1, i32imm:$simm13), 0>;
+
+// bset reg_or_imm, rd -> or rd,reg_or_imm,rd
+def : InstAlias<"bset $rs2, $rd", (ORrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>;
+def : InstAlias<"bset $simm13, $rd", (ORri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// bclr reg_or_imm, rd -> andn rd,reg_or_imm,rd
+def : InstAlias<"bclr $rs2, $rd", (ANDNrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>;
+def : InstAlias<"bclr $simm13, $rd", (ANDNri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// btog reg_or_imm, rd -> xor rd,reg_or_imm,rd
+def : InstAlias<"btog $rs2, $rd", (XORrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>;
+def : InstAlias<"btog $simm13, $rd", (XORri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+
+// clr rd -> or %g0, %g0, rd
+def : InstAlias<"clr $rd", (ORrr IntRegs:$rd, G0, G0), 0>;
+
+// clr{b,h,} [addr] -> st{b,h,} %g0, [addr]
+def : InstAlias<"clrb [$addr]", (STBrr MEMrr:$addr, G0), 0>;
+def : InstAlias<"clrb [$addr]", (STBri MEMri:$addr, G0), 0>;
+def : InstAlias<"clrh [$addr]", (STHrr MEMrr:$addr, G0), 0>;
+def : InstAlias<"clrh [$addr]", (STHri MEMri:$addr, G0), 0>;
+def : InstAlias<"clr [$addr]", (STrr MEMrr:$addr, G0), 0>;
+def : InstAlias<"clr [$addr]", (STri MEMri:$addr, G0), 0>;
+
+
+// mov reg_or_imm, rd -> or %g0, reg_or_imm, rd
+def : InstAlias<"mov $rs2, $rd", (ORrr IntRegs:$rd, G0, IntRegs:$rs2)>;
+def : InstAlias<"mov $simm13, $rd", (ORri IntRegs:$rd, G0, i32imm:$simm13)>;
+
+// mov specialreg, rd -> rd specialreg, rd
+def : InstAlias<"mov $asr, $rd", (RDASR IntRegs:$rd, ASRRegs:$asr), 0>;
+def : InstAlias<"mov %psr, $rd", (RDPSR IntRegs:$rd), 0>;
+def : InstAlias<"mov %wim, $rd", (RDWIM IntRegs:$rd), 0>;
+def : InstAlias<"mov %tbr, $rd", (RDTBR IntRegs:$rd), 0>;
+
+// mov reg_or_imm, specialreg -> wr %g0, reg_or_imm, specialreg
+def : InstAlias<"mov $rs2, $asr", (WRASRrr ASRRegs:$asr, G0, IntRegs:$rs2), 0>;
+def : InstAlias<"mov $simm13, $asr", (WRASRri ASRRegs:$asr, G0, i32imm:$simm13), 0>;
+def : InstAlias<"mov $rs2, %psr", (WRPSRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"mov $simm13, %psr", (WRPSRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"mov $rs2, %wim", (WRWIMrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"mov $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>;
+def : InstAlias<"mov $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"mov $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
+
+// End of Section A.3
+
+// wr reg_or_imm, specialreg -> wr %g0, reg_or_imm, specialreg
+// (aka: omit the first arg when it's g0. This is not in the manual, but is
+// supported by gnu and solaris as)
+def : InstAlias<"wr $rs2, $asr", (WRASRrr ASRRegs:$asr, G0, IntRegs:$rs2), 0>;
+def : InstAlias<"wr $simm13, $asr", (WRASRri ASRRegs:$asr, G0, i32imm:$simm13), 0>;
+def : InstAlias<"wr $rs2, %psr", (WRPSRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"wr $simm13, %psr", (WRPSRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"wr $rs2, %wim", (WRWIMrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"wr $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>;
+def : InstAlias<"wr $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
+
+
+// flush -> flush %g0
+def : InstAlias<"flush", (FLUSH), 0>;
+
def : MnemonicAlias<"return", "rett">, Requires<[HasV9]>;
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index f87cee43e319..6167c532db80 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -324,6 +324,15 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
numSubRegs = 4;
movOpc = SP::FMOVS;
}
+ } else if (SP::ASRRegsRegClass.contains(DestReg) &&
+ SP::IntRegsRegClass.contains(SrcReg)) {
+ BuildMI(MBB, I, DL, get(SP::WRASRrr), DestReg)
+ .addReg(SP::G0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (SP::IntRegsRegClass.contains(DestReg) &&
+ SP::ASRRegsRegClass.contains(SrcReg)) {
+ BuildMI(MBB, I, DL, get(SP::RDASR), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
} else
llvm_unreachable("Impossible reg-to-reg copy");
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index a02bae07a336..3b9e048ea8b3 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -536,6 +536,7 @@ let Defs = [ICC] in
let Uses = [ICC] in
defm SUBC : F3_12np <"subx", 0b001100>;
+// cmp (from Section A.3) is a specialized alias for subcc
let Defs = [ICC], rd = 0 in {
def CMPrr : F3_1<2, 0b010100,
(outs), (ins IntRegs:$rs1, IntRegs:$rs2),
@@ -559,12 +560,12 @@ let Defs = [Y, ICC] in {
}
// Section B.19 - Divide Instructions, p. 115
-let Defs = [Y] in {
+let Uses = [Y], Defs = [Y] in {
defm UDIV : F3_12np<"udiv", 0b001110>;
defm SDIV : F3_12np<"sdiv", 0b001111>;
}
-let Defs = [Y, ICC] in {
+let Uses = [Y], Defs = [Y, ICC] in {
defm UDIVCC : F3_12np<"udivcc", 0b011110>;
defm SDIVCC : F3_12np<"sdivcc", 0b011111>;
}
@@ -828,6 +829,20 @@ let rd = 0 in
def UNIMP : F2_1<0b000, (outs), (ins i32imm:$imm22),
"unimp $imm22", []>;
+// Section B.32 - Flush Instruction Memory
+let rd = 0 in {
+ def FLUSHrr : F3_1<2, 0b111011, (outs), (ins MEMrr:$addr),
+ "flush $addr", []>;
+ def FLUSHri : F3_2<2, 0b111011, (outs), (ins MEMri:$addr),
+ "flush $addr", []>;
+
+ // The no-arg FLUSH is only here for the benefit of the InstAlias
+ // "flush", which cannot seem to use FLUSHrr, due to the inability
+ // to construct a MEMrr with fixed G0 registers.
+ let rs1 = 0, rs2 = 0 in
+ def FLUSH : F3_1<2, 0b111011, (outs), (ins), "flush %g0", []>;
+}
+
// Section B.33 - Floating-point Operate (FPop) Instructions
// Convert Integer to Floating-point Instructions, p. 141
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index e504da4d3b21..db8a7e86962d 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -249,4 +249,6 @@ def FCCRegs : RegisterClass<"SP", [i1], 1, (sequence "FCC%u", 0, 3)>;
// Ancillary state registers
def ASRRegs : RegisterClass<"SP", [i32], 32,
- (add Y, (sequence "ASR%u", 1, 31))>;
+ (add Y, (sequence "ASR%u", 1, 31))> {
+ let isAllocatable = 0;
+}
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
deleted file mode 100644
index a308fc5e739e..000000000000
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SparcSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SparcSelectionDAGInfo.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "sparc-selectiondag-info"
-
-SparcSelectionDAGInfo::SparcSelectionDAGInfo(const DataLayout &DL)
- : TargetSelectionDAGInfo(&DL) {
-}
-
-SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
-}
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
deleted file mode 100644
index 6818291b30b4..000000000000
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Sparc subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_SPARC_SPARCSELECTIONDAGINFO_H
-#define LLVM_LIB_TARGET_SPARC_SPARCSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class SparcTargetMachine;
-
-class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
- explicit SparcSelectionDAGInfo(const DataLayout &DL);
- ~SparcSelectionDAGInfo() override;
-};
-
-}
-
-#endif
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index 479b25d2723f..d69da409e428 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -54,7 +54,7 @@ SparcSubtarget::SparcSubtarget(const Triple &TT, const std::string &CPU,
bool is64Bit)
: SparcGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- TSInfo(*TM.getDataLayout()), FrameLowering(*this) {}
+ FrameLowering(*this) {}
int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 983b1193975d..9d21911d88f0 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -17,9 +17,9 @@
#include "SparcFrameLowering.h"
#include "SparcInstrInfo.h"
#include "SparcISelLowering.h"
-#include "SparcSelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -39,7 +39,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
bool UsePopc;
SparcInstrInfo InstrInfo;
SparcTargetLowering TLInfo;
- SparcSelectionDAGInfo TSInfo;
+ TargetSelectionDAGInfo TSInfo;
SparcFrameLowering FrameLowering;
public:
@@ -56,7 +56,7 @@ public:
const SparcTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- const SparcSelectionDAGInfo *getSelectionDAGInfo() const override {
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 81882106fc46..5fefa315a4cf 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -148,7 +148,7 @@ static MCInstrInfo *createSystemZMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createSystemZMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitSystemZMCRegisterInfo(X, SystemZ::R14D);
return X;
@@ -156,12 +156,11 @@ static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *
createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createSystemZMCSubtargetInfoImpl(TT, CPU, FS);
}
-static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createSystemZMCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index a636b35635ce..397de472a6ee 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -61,11 +61,12 @@ SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
return SpillOffsetTable;
}
-void SystemZFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
MachineFrameInfo *MFFrame = MF.getFrameInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
bool HasFP = hasFP(MF);
SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
@@ -77,17 +78,17 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// argument register R6D.
if (IsVarArg)
for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
- MRI.setPhysRegUsed(SystemZ::ArgGPRs[I]);
+ SavedRegs.set(SystemZ::ArgGPRs[I]);
// If the function requires a frame pointer, record that the hard
// frame pointer will be clobbered.
if (HasFP)
- MRI.setPhysRegUsed(SystemZ::R11D);
+ SavedRegs.set(SystemZ::R11D);
// If the function calls other functions, record that the return
// address register will be clobbered.
if (MFFrame->hasCalls())
- MRI.setPhysRegUsed(SystemZ::R14D);
+ SavedRegs.set(SystemZ::R14D);
// If we are saving GPRs other than the stack pointer, we might as well
// save and restore the stack pointer at the same time, via STMG and LMG.
@@ -96,8 +97,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
for (unsigned I = 0; CSRegs[I]; ++I) {
unsigned Reg = CSRegs[I];
- if (SystemZ::GR64BitRegClass.contains(Reg) && MRI.isPhysRegUsed(Reg)) {
- MRI.setPhysRegUsed(SystemZ::R15D);
+ if (SystemZ::GR64BitRegClass.contains(Reg) && SavedRegs.test(Reg)) {
+ SavedRegs.set(SystemZ::R15D);
break;
}
}
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 60bad894ee44..5ade757f17f7 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -27,8 +27,8 @@ public:
bool isFPCloseToIncomingSP() const override { return false; }
const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const
override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 372f6fb3ea50..056ee02dcc21 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -81,10 +81,11 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
return Op;
}
-SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
+SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
const SystemZSubtarget &STI)
- : TargetLowering(tm), Subtarget(STI) {
- MVT PtrVT = getPointerTy();
+ : TargetLowering(TM), Subtarget(STI) {
+ auto &DL = *TM.getDataLayout();
+ MVT PtrVT = getPointerTy(DL);
// Set up the register classes.
if (Subtarget.hasHighWord())
@@ -455,7 +456,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
MaxStoresPerMemsetOptSize = 0;
}
-EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
+ LLVMContext &, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
@@ -507,8 +509,8 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return true;
}
-bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// Punt on globals for now, although they can be used in limited
// RELATIVE LONG cases.
@@ -544,7 +546,7 @@ bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
//===----------------------------------------------------------------------===//
TargetLowering::ConstraintType
-SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'a': // Address register
@@ -641,13 +643,14 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
// has already been verified. MC is the class associated with "t" and
// Map maps 0-based register numbers to LLVM register numbers.
static std::pair<unsigned, const TargetRegisterClass *>
-parseRegisterNumber(const std::string &Constraint,
- const TargetRegisterClass *RC, const unsigned *Map) {
+parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
+ const unsigned *Map) {
assert(*(Constraint.end()-1) == '}' && "Missing '}'");
if (isdigit(Constraint[2])) {
- std::string Suffix(Constraint.data() + 2, Constraint.size() - 2);
- unsigned Index = atoi(Suffix.c_str());
- if (Index < 16 && Map[Index])
+ unsigned Index;
+ bool Failed =
+ Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
+ if (!Failed && Index < 16 && Map[Index])
return std::make_pair(Map[Index], RC);
}
return std::make_pair(0U, nullptr);
@@ -655,8 +658,7 @@ parseRegisterNumber(const std::string &Constraint,
std::pair<unsigned, const TargetRegisterClass *>
SystemZTargetLowering::getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI, const std::string &Constraint,
- MVT VT) const {
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
@@ -687,7 +689,7 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &SystemZ::FP32BitRegClass);
}
}
- if (Constraint[0] == '{') {
+ if (Constraint.size() > 0 && Constraint[0] == '{') {
// We need to override the default register parsing for GPRs and FPRs
// because the interpretation depends on VT. The internal names of
// the registers are also different from the external names
@@ -931,7 +933,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
// Create the SelectionDAG nodes corresponding to a load
// from this parameter. Unpromoted ints and floats are
// passed as right-justified 8-byte values.
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
@@ -969,7 +971,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
&SystemZ::FP64BitRegClass);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
@@ -1019,7 +1021,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(MF.getDataLayout());
// Detect unsupported vector argument and return types.
if (Subtarget.hasVector()) {
@@ -2401,7 +2403,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
int64_t Offset = Node->getOffset();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
Reloc::Model RM = DAG.getTarget().getRelocationModel();
CodeModel::Model CM = DAG.getTarget().getCodeModel();
@@ -2440,7 +2442,7 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
unsigned Opcode,
SDValue GOTOffset) const {
SDLoc DL(Node);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Chain = DAG.getEntryNode();
SDValue Glue;
@@ -2486,7 +2488,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const {
SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
// The high part of the thread pointer is in access register 0.
@@ -2587,7 +2589,7 @@ SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
SDLoc DL(Node);
const BlockAddress *BA = Node->getBlockAddress();
int64_t Offset = Node->getOffset();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -2597,7 +2599,7 @@ SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
SelectionDAG &DAG) const {
SDLoc DL(JT);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
// Use LARL to load the address of the table.
@@ -2607,7 +2609,7 @@ SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SelectionDAG &DAG) const {
SDLoc DL(CP);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
if (CP->isMachineConstantPoolEntry())
@@ -2671,7 +2673,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
SystemZMachineFunctionInfo *FuncInfo =
MF.getInfo<SystemZMachineFunctionInfo>();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 2f7617bbdac3..949b67f114ea 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -339,10 +339,10 @@ public:
const SystemZSubtarget &STI);
// Override TargetLowering.
- MVT getScalarShiftAmountTy(EVT LHSTy) const override {
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
return MVT::i32;
}
- MVT getVectorIdxTy() const override {
+ MVT getVectorIdxTy(const DataLayout &DL) const override {
// Only the lower 12 bits of an element index are used, so we don't
// want to clobber the upper 32 bits of a GPR unnecessarily.
return MVT::i32;
@@ -364,12 +364,13 @@ public:
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
- EVT getSetCCResultType(LLVMContext &, EVT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
@@ -379,10 +380,9 @@ public:
const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
TargetLowering::ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ getConstraintType(StringRef Constraint) const override;
TargetLowering::ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo &info,
const char *constraint) const override;
@@ -391,8 +391,7 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
+ unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode.size() == 1) {
switch(ConstraintCode[0]) {
default:
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 7cabea962e91..dc7bd25d7ed5 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -36,7 +36,7 @@ SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
BitVector
SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const SystemZFrameLowering *TFI = getFrameLowering(MF);
if (TFI->hasFP(MF)) {
// R11D is the frame pointer. Reserve all aliases.
@@ -64,7 +64,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineFunction &MF = *MBB.getParent();
auto *TII =
static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const SystemZFrameLowering *TFI = getFrameLowering(MF);
DebugLoc DL = MI->getDebugLoc();
// Decompose the frame index into a base and offset.
@@ -135,6 +135,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned
SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const SystemZFrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D;
}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index e7e0268dbb8a..178aa3817311 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -18,12 +18,6 @@ using namespace llvm;
#define DEBUG_TYPE "systemz-selectiondag-info"
-SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const DataLayout &DL)
- : TargetSelectionDAGInfo(&DL) {}
-
-SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
-}
-
// Decide whether it is best to use a loop or straight-line code for
// a block operation of Size bytes with source address Src and destination
// address Dest. Sequence is the opcode to use for straight-line code
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index a257d6b55494..246fa3e5e656 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -22,8 +22,7 @@ class SystemZTargetMachine;
class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit SystemZSelectionDAGInfo(const DataLayout &DL);
- ~SystemZSelectionDAGInfo();
+ explicit SystemZSelectionDAGInfo() = default;
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Dst, SDValue Src,
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index eb5e5c0b9ff8..0b49fcdd8f78 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -42,7 +42,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasTransactionalExecution(false), HasProcessorAssist(false),
HasVector(false), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- TSInfo(*TM.getDataLayout()), FrameLowering() {}
+ TSInfo(), FrameLowering() {}
// Return true if GV binds locally under reloc model RM.
static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index e9cabe968eea..4b80973ed879 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -29,7 +29,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
public:
explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F)
- : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
SystemZTTIImpl(const SystemZTTIImpl &Arg)
@@ -37,18 +38,6 @@ public:
SystemZTTIImpl(SystemZTTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
/// \name Scalar TTI Implementations
/// @{
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 0b05303f71bf..83174c20c8e9 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -150,8 +150,9 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const {
}
TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &) { return TargetTransformInfo(getDataLayout()); });
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(F.getParent()->getDataLayout());
+ });
}
static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 87df7af84525..6a61fcdf0f86 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -19,7 +19,14 @@ using namespace llvm;
//---------------------------------------------------------------------------
// TargetSubtargetInfo Class
//
-TargetSubtargetInfo::TargetSubtargetInfo() {}
+TargetSubtargetInfo::TargetSubtargetInfo(
+ const Triple &TT, StringRef CPU, StringRef FS,
+ ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
+ const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
+ const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
+ const InstrStage *IS, const unsigned *OC, const unsigned *FP)
+ : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
+}
TargetSubtargetInfo::~TargetSubtargetInfo() {}
diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt
index df04c2a3460b..25de9eee0831 100644
--- a/lib/Target/WebAssembly/CMakeLists.txt
+++ b/lib/Target/WebAssembly/CMakeLists.txt
@@ -1,6 +1,7 @@
set(LLVM_TARGET_DEFINITIONS WebAssembly.td)
tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter)
+tablegen(LLVM WebAssemblyGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM WebAssemblyGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(WebAssemblyCommonTableGen)
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index d248556c62d7..224aa773a80e 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -29,6 +29,9 @@ using namespace llvm;
#define GET_SUBTARGETINFO_MC_DESC
#include "WebAssemblyGenSubtargetInfo.inc"
+#define GET_REGINFO_MC_DESC
+#include "WebAssemblyGenRegisterInfo.inc"
+
static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT) {
MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT);
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 24893daec7ea..eebf5b72f62b 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -47,6 +47,9 @@ MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
// Defines symbolic names for WebAssembly registers. This defines a mapping from
// register name to register number.
//
+#define GET_REGINFO_ENUM
+#include "WebAssemblyGenRegisterInfo.inc"
+
#define GET_SUBTARGETINFO_ENUM
#include "WebAssemblyGenSubtargetInfo.inc"
diff --git a/lib/Target/WebAssembly/Makefile b/lib/Target/WebAssembly/Makefile
index 35d835c6506c..f102d73f6e86 100644
--- a/lib/Target/WebAssembly/Makefile
+++ b/lib/Target/WebAssembly/Makefile
@@ -12,7 +12,8 @@ LIBRARYNAME = LLVMWebAssemblyCodeGen
TARGET = WebAssembly
# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = WebAssemblyGenSubtargetInfo.inc WebAssemblyGenMCCodeEmitter.inc
+BUILT_SOURCES = WebAssemblyGenRegisterInfo.inc WebAssemblyGenSubtargetInfo.inc \
+ WebAssemblyGenMCCodeEmitter.inc
DIRS = InstPrinter TargetInfo MCTargetDesc
diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt
index 7a71060a638f..63e02c455895 100644
--- a/lib/Target/WebAssembly/README.txt
+++ b/lib/Target/WebAssembly/README.txt
@@ -12,4 +12,15 @@ binary encoding of WebAssembly itself:
* https://github.com/WebAssembly/design/blob/master/AstSemantics.md
* https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+Interesting work that remains to be done:
+* Write a pass to restructurize irreducible control flow. This needs to be done
+ before register allocation to be efficient, because it may duplicate basic
+ blocks and WebAssembly performs register allocation at a whole-function
+ level. Note that LLVM's GPU code has such a pass, but it linearizes control
+ flow (e.g. both sides of branches execute and are masked) which is undesirable
+ for WebAssembly.
+* Basic relooper to expose control flow as an AST.
+* Figure out how to properly use MC for virtual ISAs. This may require some
+ refactoring of MC.
+
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4eec02efbd94..4184eb6dc5a6 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -38,6 +38,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// WebAssembly does not produce floating-point exceptions on normal floating
// point operations.
setHasFloatingPointExceptions(false);
+ // We don't know the microarchitecture here, so just reduce register pressure.
+ setSchedulingPreference(Sched::RegPressure);
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 35e88eec8573..64415658ed81 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -6,9 +6,10 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// WebAssembly Atomic operand code-gen constructs.
-//
+///
+/// \file
+/// \brief WebAssembly Atomic operand code-gen constructs.
+///
//===----------------------------------------------------------------------===//
// TODO: Implement atomic instructions.
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
new file mode 100644
index 000000000000..6b5b6cd54173
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -0,0 +1,21 @@
+//===- WebAssemblyInstrCall.td-WebAssembly Call codegen support -*- tablegen -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly Call operand code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+/*
+ * TODO(jfb): Add the following.
+ *
+ * call_direct: call function directly
+ * call_indirect: call function indirectly
+ * addressof: obtain a function pointer value for a given function
+ */
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
new file mode 100644
index 000000000000..3fa29061b1de
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -0,0 +1,44 @@
+//===-- WebAssemblyInstrConv.td-WebAssembly Conversion support -*- tablegen -*-=
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly datatype conversions, truncations, reinterpretations,
+/// promotions, and demotions operand code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+/*
+ * TODO(jfb): Add the following.
+ *
+ * int32.wrap[int64]: wrap a 64-bit integer to a 32-bit integer
+ * int32.trunc_signed[float32]: truncate a 32-bit float to a signed 32-bit integer
+ * int32.trunc_signed[float64]: truncate a 64-bit float to a signed 32-bit integer
+ * int32.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 32-bit integer
+ * int32.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 32-bit integer
+ * int32.reinterpret[float32]: reinterpret the bits of a 32-bit float as a 32-bit integer
+ * int64.extend_signed[int32]: extend a signed 32-bit integer to a 64-bit integer
+ * int64.extend_unsigned[int32]: extend an unsigned 32-bit integer to a 64-bit integer
+ * int64.trunc_signed[float32]: truncate a 32-bit float to a signed 64-bit integer
+ * int64.trunc_signed[float64]: truncate a 64-bit float to a signed 64-bit integer
+ * int64.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 64-bit integer
+ * int64.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 64-bit integer
+ * int64.reinterpret[float64]: reinterpret the bits of a 64-bit float as a 64-bit integer
+ * float32.demote[float64]: demote a 64-bit float to a 32-bit float
+ * float32.cvt_signed[int32]: convert a signed 32-bit integer to a 32-bit float
+ * float32.cvt_signed[int64]: convert a signed 64-bit integer to a 32-bit float
+ * float32.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 32-bit float
+ * float32.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 32-bit float
+ * float32.reinterpret[int32]: reinterpret the bits of a 32-bit integer as a 32-bit float
+ * float64.promote[float32]: promote a 32-bit float to a 64-bit float
+ * float64.cvt_signed[int32]: convert a signed 32-bit integer to a 64-bit float
+ * float64.cvt_signed[int64]: convert a signed 64-bit integer to a 64-bit float
+ * float64.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 64-bit float
+ * float64.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 64-bit float
+ * float64.reinterpret[int64]: reinterpret the bits of a 64-bit integer as a 64-bit float
+ */
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
new file mode 100644
index 000000000000..30ef6339d65a
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -0,0 +1,44 @@
+// WebAssemblyInstrFloat.td-WebAssembly Float codegen support ---*- tablegen -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly Floating-point operand code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+defm FADD : BinaryFP<fadd>;
+defm FSUB : BinaryFP<fsub>;
+defm FMUL : BinaryFP<fmul>;
+defm FDIV : BinaryFP<fdiv>;
+defm FABS : UnaryFP<fabs>;
+defm FNEG : UnaryFP<fneg>;
+defm COPYSIGN : BinaryFP<fcopysign>;
+defm CEIL : UnaryFP<fceil>;
+defm FLOOR : UnaryFP<ffloor>;
+defm TRUNC : UnaryFP<ftrunc>;
+defm NEARESTINT : UnaryFP<fnearbyint>;
+
+/*
+ * TODO(jfb): Add the following for 32-bit and 64-bit.
+ *
+ * float32.eq: compare equal
+ * float32.lt: less than
+ * float32.le: less than or equal
+ * float32.gt: greater than
+ * float32.ge: greater than or equal
+ */
+
+defm SQRT : UnaryFP<fsqrt>;
+
+/*
+ * TODO(jfb): Add the following for 32-bit and 64-bit.
+ *
+ * float32.min: minimum (binary operator); if either operand is NaN, returns NaN
+ * float32.max: maximum (binary operator); if either operand is NaN, returns NaN
+ */
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 8bbf3e9ec87b..513c36fa2ec2 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -6,9 +6,10 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// WebAssembly instruction format definitions.
-//
+///
+/// \file
+/// \brief WebAssembly instruction format definitions.
+///
//===----------------------------------------------------------------------===//
// WebAssembly Instruction Format
@@ -26,3 +27,29 @@ class I<dag oops, dag iops, list<dag> pattern, string cstr = "">
dag InOperandList = iops;
let Pattern = pattern;
}
+
+// Unary and binary instructions, for the local types that WebAssembly supports.
+multiclass UnaryInt<SDNode node> {
+ def _I32 : I<(outs Int32:$dst), (ins Int32:$src),
+ [(set Int32:$dst, (node Int32:$src))]>;
+ def _I64 : I<(outs Int64:$dst), (ins Int64:$src),
+ [(set Int64:$dst, (node Int64:$src))]>;
+}
+multiclass BinaryInt<SDNode node> {
+ def _I32 : I<(outs Int32:$dst), (ins Int32:$lhs, Int32:$rhs),
+ [(set Int32:$dst, (node Int32:$lhs, Int32:$rhs))]>;
+ def _I64 : I<(outs Int64:$dst), (ins Int64:$lhs, Int64:$rhs),
+ [(set Int64:$dst, (node Int64:$lhs, Int64:$rhs))]>;
+}
+multiclass UnaryFP<SDNode node> {
+ def _F32 : I<(outs Float32:$dst), (ins Float32:$src),
+ [(set Float32:$dst, (node Float32:$src))]>;
+ def _F64 : I<(outs Float64:$dst), (ins Float64:$src),
+ [(set Float64:$dst, (node Float64:$src))]>;
+}
+multiclass BinaryFP<SDNode node> {
+ def _F32 : I<(outs Float32:$dst), (ins Float32:$lhs, Float32:$rhs),
+ [(set Float32:$dst, (node Float32:$lhs, Float32:$rhs))]>;
+ def _F64 : I<(outs Float64:$dst), (ins Float64:$lhs, Float64:$rhs),
+ [(set Float64:$dst, (node Float64:$lhs, Float64:$rhs))]>;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 142eccfbcaa5..fe3ca76dc08a 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -6,9 +6,10 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// WebAssembly Instruction definitions.
-//
+///
+/// \file
+/// \brief WebAssembly Instruction definitions.
+///
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -32,6 +33,13 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
// WebAssembly-specific Operands.
//===----------------------------------------------------------------------===//
+/*
+ * TODO(jfb): Add the following.
+ *
+ * get_local: read the current value of a local variable
+ * set_local: set the current value of a local variable
+*/
+
//===----------------------------------------------------------------------===//
// WebAssembly Instruction Format Definitions.
//===----------------------------------------------------------------------===//
@@ -42,5 +50,10 @@ include "WebAssemblyInstrFormats.td"
// Additional sets of instructions.
//===----------------------------------------------------------------------===//
+include "WebAssemblyInstrMemory.td"
+include "WebAssemblyInstrCall.td"
+include "WebAssemblyInstrInteger.td"
+include "WebAssemblyInstrFloat.td"
+include "WebAssemblyInstrConv.td"
include "WebAssemblyInstrAtomics.td"
include "WebAssemblyInstrSIMD.td"
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
new file mode 100644
index 000000000000..5f60fe81b1a2
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -0,0 +1,45 @@
+// WebAssemblyInstrInteger.td-WebAssembly Integer codegen -------*- tablegen -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly Integer operand code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+defm ADD : BinaryInt<add>;
+defm SUB : BinaryInt<sub>;
+defm MUL : BinaryInt<mul>;
+defm SDIV : BinaryInt<sdiv>;
+defm UDIV : BinaryInt<udiv>;
+defm SREM : BinaryInt<srem>;
+defm UREM : BinaryInt<urem>;
+defm AND : BinaryInt<and>;
+defm IOR : BinaryInt<or>;
+defm XOR : BinaryInt<xor>;
+defm SHL : BinaryInt<shl>;
+defm SHR : BinaryInt<srl>;
+defm SAR : BinaryInt<sra>;
+
+/*
+ * TODO(jfb): Add the following for 32-bit and 64-bit.
+ *
+ * int32.eq: signed-less compare equal
+ * int32.slt: signed less than
+ * int32.sle: signed less than or equal
+ * int32.ult: unsigned less than
+ * int32.ule: unsigned less than or equal
+ * int32.sgt: signed greater than
+ * int32.sge: signed greater than or equal
+ * int32.ugt: unsigned greater than
+ * int32.uge: unsigned greater than or equal
+ */
+
+defm CLZ : UnaryInt<ctlz>;
+defm CTZ : UnaryInt<cttz>;
+defm POPCNT : UnaryInt<ctpop>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
new file mode 100644
index 000000000000..5ab40e826caa
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -0,0 +1,46 @@
+// WebAssemblyInstrMemory.td-WebAssembly Memory codegen support -*- tablegen -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly Memory operand code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+/*
+ * TODO(jfb): Add the following.
+ * Each has optional alignment and immediate byte offset.
+ *
+ * int32.load_sx[int8]: sign-extend to int32
+ * int32.load_sx[int16]: sign-extend to int32
+ * int32.load_zx[int8]: zero-extend to int32
+ * int32.load_zx[int16]: zero-extend to int32
+ * int32.load[int32]: (no conversion)
+ * int64.load_sx[int8]: sign-extend to int64
+ * int64.load_sx[int16]: sign-extend to int64
+ * int64.load_sx[int32]: sign-extend to int64
+ * int64.load_zx[int8]: zero-extend to int64
+ * int64.load_zx[int16]: zero-extend to int64
+ * int64.load_zx[int32]: zero-extend to int64
+ * int64.load[int64]: (no conversion)
+ * float32.load[float32]: (no conversion)
+ * float64.load[float64]: (no conversion)
+ *
+ * int32.store[int8]: wrap int32 to int8
+ * int32.store[int16]: wrap int32 to int16
+ * int32.store[int32]: (no conversion)
+ * int64.store[int8]: wrap int64 to int8
+ * int64.store[int16]: wrap int64 to int16
+ * int64.store[int32]: wrap int64 to int32
+ * int64.store[int64]: (no conversion)
+ * float32.store[float32]: (no conversion)
+ * float64.store[float64]: (no conversion)
+ *
+ * load_global: load the value of a given global variable
+ * store_global: store a given value to a given global variable
+ */
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index e25483ad3f7a..3e29906219d2 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -6,9 +6,10 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// WebAssembly SIMD operand code-gen constructs.
-//
+///
+/// \file
+/// \brief WebAssembly SIMD operand code-gen constructs.
+///
//===----------------------------------------------------------------------===//
// TODO: Implement SIMD instructions.
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index ad24c90af6a2..385c40bf6693 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -30,4 +30,58 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-reg-info"
-WebAssemblyRegisterInfo::WebAssemblyRegisterInfo(const Triple &TT) : TT(TT) {}
+#define GET_REGINFO_TARGET_DESC
+#include "WebAssemblyGenRegisterInfo.inc"
+
+WebAssemblyRegisterInfo::WebAssemblyRegisterInfo(const Triple &TT)
+ : WebAssemblyGenRegisterInfo(0), TT(TT) {}
+
+const MCPhysReg *
+WebAssemblyRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
+ static const MCPhysReg CalleeSavedRegs[] = {0};
+ return CalleeSavedRegs;
+}
+
+BitVector
+WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ for (auto Reg : {WebAssembly::SP32, WebAssembly::SP64, WebAssembly::FP32,
+ WebAssembly::FP64})
+ Reserved.set(Reg);
+ return Reserved;
+}
+
+void WebAssemblyRegisterInfo::eliminateFrameIndex(
+ MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ llvm_unreachable("WebAssemblyRegisterInfo::eliminateFrameIndex"); // FIXME
+}
+
+unsigned
+WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ static const unsigned Regs[2][2] = {
+ /* !isArch64Bit isArch64Bit */
+ /* !hasFP */ {WebAssembly::SP32, WebAssembly::SP64},
+ /* hasFP */ {WebAssembly::FP32, WebAssembly::FP64}};
+ const WebAssemblyFrameLowering *TFI = getFrameLowering(MF);
+ return Regs[TFI->hasFP(MF)][TT.isArch64Bit()];
+}
+
+bool WebAssemblyRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ return !MF.getFunction()->hasFnAttribute("no-realign-stack");
+}
+
+// FIXME: share this with other backends with identical implementation?
+bool WebAssemblyRegisterInfo::needsStackRealignment(
+ const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const WebAssemblyFrameLowering *TFI = getFrameLowering(MF);
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = TFI->getStackAlignment();
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
index 55300287a51e..dbdb9d0457af 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -16,6 +16,9 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H
+#define GET_REGINFO_HEADER
+#include "WebAssemblyGenRegisterInfo.inc"
+
namespace llvm {
class MachineFunction;
@@ -23,11 +26,25 @@ class RegScavenger;
class TargetRegisterClass;
class Triple;
-class WebAssemblyRegisterInfo final {
+class WebAssemblyRegisterInfo final : public WebAssemblyGenRegisterInfo {
const Triple &TT;
public:
explicit WebAssemblyRegisterInfo(const Triple &TT);
+
+ // Code Generation virtual methods.
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
+
+ // Base pointer (stack realignment) support.
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 7b3d636a2605..2ba42eb94a40 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -6,10 +6,11 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file describes the WebAssembly register classes and some nominal
-// physical registers.
-//
+///
+/// \file
+/// \brief This file describes the WebAssembly register classes and some nominal
+/// physical registers.
+///
//===----------------------------------------------------------------------===//
class WebAssemblyReg<string n> : Register<n> {
@@ -23,6 +24,31 @@ class WebAssemblyRegClass<list<ValueType> regTypes, int alignment, dag regList>
// Registers
//===----------------------------------------------------------------------===//
+// Special registers used as the frame and stack pointer.
+//
+// WebAssembly may someday supports mixed 32-bit and 64-bit heaps in the same
+// application, which requires separate width FP and SP.
+def FP32 : WebAssemblyReg<"%FP32">;
+def FP64 : WebAssemblyReg<"%FP64">;
+def SP32 : WebAssemblyReg<"%SP32">;
+def SP64 : WebAssemblyReg<"%SP64">;
+
+// TODO(jfb) The following comes from NVPTX. Is it really needed, or can we do
+// away with it? Try deleting once the backend works.
+// WebAssembly uses virtual registers, but the backend defines a few physical
+// registers here to keep SDAG and the MachineInstr layers happy.
+foreach i = 0-4 in {
+ def I#i : WebAssemblyReg<"%i."#i>; // i32
+ def L#i : WebAssemblyReg<"%l."#i>; // i64
+ def F#i : WebAssemblyReg<"%f."#i>; // f32
+ def D#i : WebAssemblyReg<"%d."#i>; // f64
+}
+
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
+
+def Int32 : WebAssemblyRegClass<[i32], 32, (add (sequence "I%u", 0, 4), SP32)>;
+def Int64 : WebAssemblyRegClass<[i64], 64, (add (sequence "L%u", 0, 4), SP64)>;
+def Float32 : WebAssemblyRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
+def Float64 : WebAssemblyRegClass<[f64], 64, (add (sequence "D%u", 0, 4))>;
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
index cfd1bafff236..fae9c6100510 100644
--- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -17,7 +17,4 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-selectiondag-info"
-WebAssemblySelectionDAGInfo::WebAssemblySelectionDAGInfo(const DataLayout *DL)
- : TargetSelectionDAGInfo(DL) {}
-
WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() {}
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
index 03e8d393558d..13d96671276d 100644
--- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
@@ -22,7 +22,6 @@ namespace llvm {
class WebAssemblySelectionDAGInfo final : public TargetSelectionDAGInfo {
public:
- explicit WebAssemblySelectionDAGInfo(const DataLayout *DL);
~WebAssemblySelectionDAGInfo() override;
};
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index addea8e3cc36..3d9e7aacbfbf 100644
--- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -42,7 +42,7 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
const TargetMachine &TM)
: WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false),
CPUString(CPU), TargetTriple(TT), FrameLowering(),
- InstrInfo(initializeSubtargetDependencies(FS)),
- TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
+ InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
+ TLInfo(TM, *this) {}
bool WebAssemblySubtarget::enableMachineScheduler() const { return true; }
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 08bd88c06985..7ffb6047b963 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -31,7 +31,6 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
typedef TargetTransformInfo TTI;
friend BaseT;
- const WebAssemblyTargetMachine *TM;
const WebAssemblySubtarget *ST;
const WebAssemblyTargetLowering *TLI;
@@ -40,30 +39,15 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
public:
WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F)
- : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)),
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
WebAssemblyTTIImpl(const WebAssemblyTTIImpl &Arg)
- : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST),
- TLI(Arg.TLI) {}
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
WebAssemblyTTIImpl(WebAssemblyTTIImpl &&Arg)
- : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)),
- ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {}
- WebAssemblyTTIImpl &operator=(const WebAssemblyTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- TM = RHS.TM;
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- WebAssemblyTTIImpl &operator=(WebAssemblyTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- TM = std::move(RHS.TM);
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
/// \name Scalar TTI Implementations
/// @{
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 3cad9fa1e2ae..91b144a44824 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -878,6 +878,29 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::EXTRQI:
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isImm())
+ DecodeEXTRQIMask(MI->getOperand(2).getImm(),
+ MI->getOperand(3).getImm(),
+ ShuffleMask);
+
+ DestName = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+
+ case X86::INSERTQI:
+ if (MI->getOperand(3).isImm() &&
+ MI->getOperand(4).isImm())
+ DecodeINSERTQIMask(MI->getOperand(3).getImm(),
+ MI->getOperand(4).getImm(),
+ ShuffleMask);
+
+ DestName = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
+
case X86::PMOVZXBWrr:
case X86::PMOVZXBDrr:
case X86::PMOVZXBQrr:
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 3e0dc1424609..629802f5dc5e 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -220,7 +220,6 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
case X86::PUSH32i8: return X86::PUSHi32;
case X86::PUSH16i8: return X86::PUSHi16;
case X86::PUSH64i8: return X86::PUSH64i32;
- case X86::PUSH64i16: return X86::PUSH64i32;
}
}
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 89f394582631..ddb764facdbf 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -34,14 +34,16 @@ public:
report_fatal_error(EC.message());
StringRef SymName = *SymNameOrErr;
- uint64_t SymAddr; SymI->getAddress(SymAddr);
+ ErrorOr<uint64_t> SymAddr = SymI->getAddress();
+ if (std::error_code EC = SymAddr.getError())
+ report_fatal_error(EC.message());
uint64_t SymSize = SymI->getSize();
int64_t Addend = *ELFRelocationRef(Rel).getAddend();
MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
if (!Sym->isVariable())
- Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(*SymAddr, Ctx));
const MCExpr *Expr = nullptr;
// If hasAddend is true, then we need to add Addend (r_addend) to Expr.
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 431010d4cbc2..83b4091d7665 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -88,9 +88,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
if (CPUName.empty())
CPUName = "generic";
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
- return X;
+ return createX86MCSubtargetInfoImpl(TT, CPUName, ArchFS);
}
static MCInstrInfo *createX86MCInstrInfo() {
@@ -99,17 +97,14 @@ static MCInstrInfo *createX86MCInstrInfo() {
return X;
}
-static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
- Triple TheTriple(TT);
- unsigned RA = (TheTriple.getArch() == Triple::x86_64)
- ? X86::RIP // Should have dwarf #16.
- : X86::EIP; // Should have dwarf #8.
+static MCRegisterInfo *createX86MCRegisterInfo(const Triple &TT) {
+ unsigned RA = (TT.getArch() == Triple::x86_64)
+ ? X86::RIP // Should have dwarf #16.
+ : X86::EIP; // Should have dwarf #8.
MCRegisterInfo *X = new MCRegisterInfo();
- InitX86MCRegisterInfo(X, RA,
- X86_MC::getDwarfRegFlavour(TheTriple, false),
- X86_MC::getDwarfRegFlavour(TheTriple, true),
- RA);
+ InitX86MCRegisterInfo(X, RA, X86_MC::getDwarfRegFlavour(TT, false),
+ X86_MC::getDwarfRegFlavour(TT, true), RA);
X86_MC::InitLLVM2SEHRegisterMapping(X);
return X;
}
@@ -156,24 +151,23 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createX86MCCodeGenInfo(const Triple &TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- Triple T(TT);
- bool is64Bit = T.getArch() == Triple::x86_64;
+ bool is64Bit = TT.getArch() == Triple::x86_64;
if (RM == Reloc::Default) {
// Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
// Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
// use static relocation model by default.
- if (T.isOSDarwin()) {
+ if (TT.isOSDarwin()) {
if (is64Bit)
RM = Reloc::PIC_;
else
RM = Reloc::DynamicNoPIC;
- } else if (T.isOSWindows() && is64Bit)
+ } else if (TT.isOSWindows() && is64Bit)
RM = Reloc::PIC_;
else
RM = Reloc::Static;
@@ -186,13 +180,13 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
if (RM == Reloc::DynamicNoPIC) {
if (is64Bit)
RM = Reloc::PIC_;
- else if (!T.isOSDarwin())
+ else if (!TT.isOSDarwin())
RM = Reloc::Static;
}
// If we are on Darwin, disallow static relocation model in X86-64 mode, since
// the Mach-O file format doesn't support it.
- if (RM == Reloc::Static && T.isOSDarwin() && is64Bit)
+ if (RM == Reloc::Static && TT.isOSDarwin() && is64Bit)
RM = Reloc::PIC_;
// For static codegen, if we're not already set, use Small codegen.
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index c9479b62f7b6..9bfe999424fa 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -34,7 +34,7 @@ public:
if (std::error_code EC = SymNameOrErr.getError())
report_fatal_error(EC.message());
StringRef SymName = *SymNameOrErr;
- uint64_t SymAddr; SymI->getAddress(SymAddr);
+ uint64_t SymAddr = SymI->getValue();
any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl());
bool isPCRel = Obj->getAnyRelocationPCRel(RE);
@@ -90,8 +90,7 @@ public:
const MCExpr *LHS = MCSymbolRefExpr::create(Sym, Ctx);
symbol_iterator RSymI = Rel.getSymbol();
- uint64_t RSymAddr;
- RSymI->getAddress(RSymAddr);
+ uint64_t RSymAddr = RSymI->getValue();
ErrorOr<StringRef> RSymName = RSymI->getName();
if (std::error_code EC = RSymName.getError())
report_fatal_error(EC.message());
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index ef3318ba7580..cae865a40819 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -255,15 +255,13 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
- if (Imm & 0x88)
- return; // Not a shuffle
-
unsigned HalfSize = VT.getVectorNumElements() / 2;
for (unsigned l = 0; l != 2; ++l) {
- unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize;
+ unsigned HalfMask = Imm >> (l * 4);
+ unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
- ShuffleMask.push_back(i);
+ ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : (int)i);
}
}
@@ -431,4 +429,78 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
for (unsigned i = 1; i < NumElts; i++)
Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
+
+void DecodeEXTRQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask) {
+ // Only the bottom 6 bits are valid for each immediate.
+ Len &= 0x3F;
+ Idx &= 0x3F;
+
+ // We can only decode this bit extraction instruction as a shuffle if both the
+ // length and index work with whole bytes.
+ if (0 != (Len % 8) || 0 != (Idx % 8))
+ return;
+
+ // A length of zero is equivalent to a bit length of 64.
+ if (Len == 0)
+ Len = 64;
+
+ // If the length + index exceeds the bottom 64 bits the result is undefined.
+ if ((Len + Idx) > 64) {
+ ShuffleMask.append(16, SM_SentinelUndef);
+ return;
+ }
+
+ // Convert index and index to work with bytes.
+ Len /= 8;
+ Idx /= 8;
+
+ // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes
+ // of the lower 64-bits. The upper 64-bits are undefined.
+ for (int i = 0; i != Len; ++i)
+ ShuffleMask.push_back(i + Idx);
+ for (int i = Len; i != 8; ++i)
+ ShuffleMask.push_back(SM_SentinelZero);
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(SM_SentinelUndef);
+}
+
+void DecodeINSERTQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask) {
+ // Only the bottom 6 bits are valid for each immediate.
+ Len &= 0x3F;
+ Idx &= 0x3F;
+
+ // We can only decode this bit insertion instruction as a shuffle if both the
+ // length and index work with whole bytes.
+ if (0 != (Len % 8) || 0 != (Idx % 8))
+ return;
+
+ // A length of zero is equivalent to a bit length of 64.
+ if (Len == 0)
+ Len = 64;
+
+ // If the length + index exceeds the bottom 64 bits the result is undefined.
+ if ((Len + Idx) > 64) {
+ ShuffleMask.append(16, SM_SentinelUndef);
+ return;
+ }
+
+ // Convert index and index to work with bytes.
+ Len /= 8;
+ Idx /= 8;
+
+ // INSERTQ: Extract lowest Len bytes from lower half of second source and
+ // insert over first source starting at Idx byte. The upper 64-bits are
+ // undefined.
+ for (int i = 0; i != Idx; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = 0; i != Len; ++i)
+ ShuffleMask.push_back(i + 16);
+ for (int i = Idx + Len; i != 8; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(SM_SentinelUndef);
+}
+
} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 14b69434806e..3d10d18e860e 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -100,6 +100,14 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a scalar float move instruction as a shuffle mask.
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask.
+void DecodeEXTRQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
+void DecodeINSERTQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
#endif
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 02645460b6a2..b4319c8bb04f 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -317,7 +317,7 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
}
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
- EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
@@ -608,7 +608,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
// Prepare for inserting code in the local-value area.
SavePoint SaveInsertPt = enterLocalValueArea();
- if (TLI.getPointerTy() == MVT::i64) {
+ if (TLI.getPointerTy(DL) == MVT::i64) {
Opc = X86::MOV64rm;
RC = &X86::GR64RegClass;
@@ -690,13 +690,14 @@ redo_gep:
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return X86SelectAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return X86SelectAddress(U->getOperand(0), AM);
break;
@@ -866,14 +867,14 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
case Instruction::IntToPtr:
// Look past no-op inttoptrs if its operand is in the same BB.
if (InMBB &&
- TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return X86SelectCallAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints if its operand is in the same BB.
- if (InMBB &&
- TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return X86SelectCallAddress(U->getOperand(0), AM);
break;
}
@@ -1000,7 +1001,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -1031,7 +1032,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
return false;
unsigned SrcReg = Reg + VA.getValNo();
- EVT SrcVT = TLI.getValueType(RV->getType());
+ EVT SrcVT = TLI.getValueType(DL, RV->getType());
EVT DstVT = VA.getValVT();
// Special handling for extended integers.
if (SrcVT != DstVT) {
@@ -1300,7 +1301,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
}
bool X86FastISel::X86SelectZExt(const Instruction *I) {
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
if (!TLI.isTypeLegal(DstVT))
return false;
@@ -1309,7 +1310,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
return false;
// Handle zero-extension from i1 to i8, which is common.
- MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
+ MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
if (SrcVT.SimpleTy == MVT::i1) {
// Set the high bits to zero.
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
@@ -1362,7 +1363,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
X86::CondCode CC;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
- EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+ EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
// Try to optimize or fold the cmp.
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
@@ -1802,7 +1803,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
// Emit a compare of the LHS and RHS, setting the flags.
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
return false;
@@ -2004,7 +2005,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
return false;
} else {
@@ -2166,8 +2167,8 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
}
bool X86FastISel::X86SelectTrunc(const Instruction *I) {
- EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
// This code only handles truncation to byte.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
@@ -2416,7 +2417,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
}
case Intrinsic::stackprotector: {
// Emit code to store the stack guard onto the stack.
- EVT PtrTy = TLI.getPointerTy();
+ EVT PtrTy = TLI.getPointerTy(DL);
const Value *Op1 = II->getArgOperand(0); // The guard's value.
const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
@@ -2735,7 +2736,7 @@ bool X86FastISel::fastLowerArguments() {
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
return false;
- EVT ArgVT = TLI.getValueType(ArgTy);
+ EVT ArgVT = TLI.getValueType(DL, ArgTy);
if (!ArgVT.isSimple()) return false;
switch (ArgVT.getSimpleVT().SimpleTy) {
default: return false;
@@ -2772,7 +2773,7 @@ bool X86FastISel::fastLowerArguments() {
unsigned GPRIdx = 0;
unsigned FPRIdx = 0;
for (auto const &Arg : F->args()) {
- MVT VT = TLI.getSimpleValueType(Arg.getType());
+ MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned SrcReg;
switch (VT.SimpleTy) {
@@ -3108,7 +3109,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ !GV->isStrongDefinitionForLinker() &&
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
@@ -3240,8 +3241,8 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
return X86SelectSIToFP(I);
case Instruction::IntToPtr: // Deliberate fall-through.
case Instruction::PtrToInt: {
- EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
if (DstVT.bitsGT(SrcVT))
return X86SelectZExt(I);
if (DstVT.bitsLT(SrcVT))
@@ -3384,7 +3385,7 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
addDirectMem(MIB, AddrReg);
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
- TM.getDataLayout()->getPointerSize(), Align);
+ DL.getPointerSize(), Align);
MIB->addMemOperand(*FuncInfo.MF, MMO);
return ResultReg;
}
@@ -3411,17 +3412,17 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (TM.getRelocationModel() == Reloc::Static &&
- TLI.getPointerTy() == MVT::i64) {
+ TLI.getPointerTy(DL) == MVT::i64) {
// The displacement code could be more than 32 bits away so we need to use
// an instruction with a 64 bit immediate
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
ResultReg)
.addGlobalAddress(GV);
} else {
- unsigned Opc = TLI.getPointerTy() == MVT::i32
- ? (Subtarget->isTarget64BitILP32()
- ? X86::LEA64_32r : X86::LEA32r)
- : X86::LEA64r;
+ unsigned Opc =
+ TLI.getPointerTy(DL) == MVT::i32
+ ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
+ : X86::LEA64r;
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg), AM);
}
@@ -3431,7 +3432,7 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
}
unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple())
@@ -3463,11 +3464,11 @@ unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
X86AddressMode AM;
if (!X86SelectAddress(C, AM))
return 0;
- unsigned Opc = TLI.getPointerTy() == MVT::i32
- ? (Subtarget->isTarget64BitILP32()
- ? X86::LEA64_32r : X86::LEA32r)
- : X86::LEA64r;
- const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned Opc =
+ TLI.getPointerTy(DL) == MVT::i32
+ ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
+ : X86::LEA64r;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg), AM);
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 40b9c8a863a3..36a8cdbab55b 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -301,8 +301,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
bool FPIsUsed = false;
static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned i = 0; i <= 6; ++i)
- if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+ if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
FPIsUsed = true;
break;
}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 85c5b6499131..2a35c4cf31f3 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -90,7 +90,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
TRI->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
+ MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
MMI.callsUnwindInit() || MMI.callsEHReturn() ||
MFI->hasStackMap() || MFI->hasPatchPoint());
@@ -967,13 +967,26 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
if (X86FI->getRestoreBasePointer()) {
- // Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain.
+ // Stash value of base pointer. Saving RSP instead of EBP shortens
+ // dependence chain. Used by SjLj EH.
unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
FramePtr, true, X86FI->getRestoreBasePointerOffset())
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
}
+
+ if (X86FI->getHasSEHFramePtrSave()) {
+ // Stash the value of the frame pointer relative to the base pointer for
+ // Win32 EH. This supports Win32 EH, which does the inverse of the above:
+ // it recovers the frame pointer from the base pointer rather than the
+ // other way around.
+ unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true,
+ getFrameIndexOffset(MF, X86FI->getSEHFramePtrSaveIndex()))
+ .addReg(FramePtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
@@ -1412,9 +1425,11 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-void
-X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
MachineFrameInfo *MFI = MF.getFrameInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1436,7 +1451,7 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Spill the BasePtr if it's used.
if (TRI->hasBasePointer(MF))
- MF.getRegInfo().setPhysRegUsed(TRI->getBaseRegister());
+ SavedRegs.set(TRI->getBaseRegister());
}
static bool
@@ -1667,8 +1682,6 @@ void X86FrameLowering::adjustForSegmentedStacks(
.addImm(StackSize);
BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
.addImm(X86FI->getArgumentStackSize());
- MF.getRegInfo().setPhysRegUsed(Reg10);
- MF.getRegInfo().setPhysRegUsed(Reg11);
} else {
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index c274c8820149..495cfcd1c3f7 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -68,8 +68,8 @@ public:
void adjustForHiPEPrologue(MachineFunction &MF,
MachineBasicBlock &PrologueMBB) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = nullptr) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS = nullptr) const override;
bool
assignCalleeSavedSpillSlots(MachineFunction &MF,
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 6b23e62a2d35..d5351d25d6ed 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -246,8 +246,9 @@ namespace {
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
- ? CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
- TLI->getPointerTy())
+ ? CurDAG->getTargetFrameIndex(
+ AM.Base_FrameIndex,
+ TLI->getPointerTy(CurDAG->getDataLayout()))
: AM.Base_Reg;
Scale = getI8Imm(AM.Scale, DL);
Index = AM.IndexReg;
@@ -581,11 +582,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
void X86DAGToDAGISel::EmitSpecialCodeForMain() {
if (Subtarget->isTargetCygMing()) {
TargetLowering::ArgListTy Args;
+ auto &DL = CurDAG->getDataLayout();
TargetLowering::CallLoweringInfo CLI(*CurDAG);
CLI.setChain(CurDAG->getRoot())
.setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
- CurDAG->getExternalSymbol("__main", TLI->getPointerTy()),
+ CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
std::move(Args), 0);
const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
@@ -1025,7 +1027,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
switch (N.getOpcode()) {
default: break;
- case ISD::FRAME_ALLOC_RECOVER: {
+ case ISD::LOCAL_RECOVER: {
if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
// Use the symbol and don't prefix it.
@@ -1638,7 +1640,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
///
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
- return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
+ auto &DL = MF->getDataLayout();
+ return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
}
/// Atomic opcode table
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b16bd18aefaa..6e22ab30057c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -76,7 +76,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
: TargetLowering(TM), Subtarget(&STI) {
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
- TD = getDataLayout();
+ TD = TM.getDataLayout();
// Set up the TargetLowering object.
static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
@@ -505,7 +505,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(*TD), Custom);
// GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
@@ -825,6 +825,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
+ setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
+ setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
+
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
@@ -944,6 +949,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
}
+ setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
+ setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
+ setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
+ setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
+ setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
+ setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
+
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1018,6 +1032,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SHL, MVT::v2i64, Custom);
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v2i64, Custom);
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
}
@@ -1141,6 +1156,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
+ setOperationAction(ISD::SMAX, MVT::v32i8, Legal);
+ setOperationAction(ISD::SMAX, MVT::v16i16, Legal);
+ setOperationAction(ISD::SMAX, MVT::v8i32, Legal);
+ setOperationAction(ISD::UMAX, MVT::v32i8, Legal);
+ setOperationAction(ISD::UMAX, MVT::v16i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v8i32, Legal);
+ setOperationAction(ISD::SMIN, MVT::v32i8, Legal);
+ setOperationAction(ISD::SMIN, MVT::v16i16, Legal);
+ setOperationAction(ISD::SMIN, MVT::v8i32, Legal);
+ setOperationAction(ISD::UMIN, MVT::v32i8, Legal);
+ setOperationAction(ISD::UMIN, MVT::v16i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v8i32, Legal);
+
// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
@@ -1184,6 +1212,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SHL, MVT::v4i64, Custom);
setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v4i64, Custom);
setOperationAction(ISD::SRA, MVT::v8i32, Custom);
// Custom lower several nodes for 256-bit types.
@@ -1376,6 +1405,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v16i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8i1, Custom);
+ setOperationAction(ISD::SMAX, MVT::v16i32, Legal);
+ setOperationAction(ISD::SMAX, MVT::v8i64, Legal);
+ setOperationAction(ISD::UMAX, MVT::v16i32, Legal);
+ setOperationAction(ISD::UMAX, MVT::v8i64, Legal);
+ setOperationAction(ISD::SMIN, MVT::v16i32, Legal);
+ setOperationAction(ISD::SMIN, MVT::v8i64, Legal);
+ setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
+ setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
+
setOperationAction(ISD::ADD, MVT::v8i64, Legal);
setOperationAction(ISD::ADD, MVT::v16i32, Legal);
@@ -1473,6 +1511,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SUB, MVT::v32i16, Legal);
setOperationAction(ISD::SUB, MVT::v64i8, Legal);
setOperationAction(ISD::MUL, MVT::v32i16, Legal);
+ setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
+ setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
@@ -1492,6 +1532,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
+ setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
+ setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v64i8, Legal);
+ setOperationAction(ISD::UMAX, MVT::v32i16, Legal);
+ setOperationAction(ISD::SMIN, MVT::v64i8, Legal);
+ setOperationAction(ISD::SMIN, MVT::v32i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
+ setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
+
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;
@@ -1531,6 +1580,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::XOR, MVT::v4i32, Legal);
setOperationAction(ISD::SRA, MVT::v2i64, Custom);
setOperationAction(ISD::SRA, MVT::v4i64, Custom);
+
+ setOperationAction(ISD::SMAX, MVT::v2i64, Legal);
+ setOperationAction(ISD::SMAX, MVT::v4i64, Legal);
+ setOperationAction(ISD::UMAX, MVT::v2i64, Legal);
+ setOperationAction(ISD::UMAX, MVT::v4i64, Legal);
+ setOperationAction(ISD::SMIN, MVT::v2i64, Legal);
+ setOperationAction(ISD::SMIN, MVT::v4i64, Legal);
+ setOperationAction(ISD::UMIN, MVT::v2i64, Legal);
+ setOperationAction(ISD::UMIN, MVT::v4i64, Legal);
}
// We want to custom lower some of our intrinsics.
@@ -1611,6 +1669,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BUILD_VECTOR);
@@ -1652,7 +1711,8 @@ X86TargetLowering::getPreferredVectorAction(EVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
-EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT VT) const {
if (!VT.isVector())
return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
@@ -1724,10 +1784,11 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
-unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
+unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const {
if (Subtarget->is64Bit()) {
// Max of 8 and alignment of type.
- unsigned TyAlign = TD->getABITypeAlignment(Ty);
+ unsigned TyAlign = DL.getABITypeAlignment(Ty);
if (TyAlign > 8)
return TyAlign;
return 8;
@@ -1840,7 +1901,8 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
if (!Subtarget->is64Bit())
// This doesn't have SDLoc associated with it, but is not really the
// same as a Register.
- return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
+ return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
+ getPointerTy(DAG.getDataLayout()));
return Table;
}
@@ -2032,7 +2094,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// false, then an sret argument may be implicitly inserted in the SelDAG. In
// either case FuncInfo->setSRetReturnReg() will have been called.
if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
- SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy());
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg,
+ getPointerTy(MF.getDataLayout()));
unsigned RetValReg
= (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
@@ -2041,7 +2104,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
Flag = Chain.getValue(1);
// RAX/EAX now acts like a return value.
- RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
+ RetOps.push_back(
+ DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
}
RetOps[0] = Chain; // Update chain.
@@ -2288,11 +2352,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned Bytes = Flags.getByValSize();
if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
- return DAG.getFrameIndex(FI, getPointerTy());
+ return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue Val = DAG.getLoad(ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
@@ -2471,7 +2535,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (Ins[i].Flags.isSRet()) {
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
- MVT PtrTy = getPointerTy();
+ MVT PtrTy = getPointerTy(DAG.getDataLayout());
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
}
@@ -2499,7 +2563,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
MachineModuleInfo &MMI = MF.getMMI();
const Function *WinEHParent = nullptr;
- if (IsWin64 && MMI.hasWinEHFuncInfo(Fn))
+ if (MMI.hasWinEHFuncInfo(Fn))
WinEHParent = MMI.getWinEHParent(Fn);
bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
@@ -2561,11 +2625,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
- getPointerTy());
+ getPointerTy(DAG.getDataLayout()));
unsigned Offset = FuncInfo->getVarArgsGPOffset();
for (SDValue Val : LiveGPRs) {
- SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
- DAG.getIntPtrConstant(Offset, dl));
+ SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ RSFIN, DAG.getIntPtrConstant(Offset, dl));
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo::getFixedStack(
@@ -2592,7 +2656,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- } else if (IsWinEHOutlined) {
+ } else if (IsWin64 && IsWinEHOutlined) {
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -2605,8 +2669,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// Store the second integer parameter (rdx) into rsp+16 relative to the
// stack pointer at the entry of the function.
- SDValue RSFIN =
- DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy());
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ getPointerTy(DAG.getDataLayout()));
unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
Chain = DAG.getStore(
@@ -2680,14 +2744,21 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setArgumentStackSize(StackSize);
if (IsWinEHParent) {
- int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
- SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
- MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
- SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
- Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
- MachinePointerInfo::getFixedStack(UnwindHelpFI),
- /*isVolatile=*/true,
- /*isNonTemporal=*/false, /*Alignment=*/0);
+ if (Is64Bit) {
+ int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+ SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
+ MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
+ SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
+ Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
+ MachinePointerInfo::getFixedStack(UnwindHelpFI),
+ /*isVolatile=*/true,
+ /*isNonTemporal=*/false, /*Alignment=*/0);
+ } else {
+ // Functions using Win32 EH are considered to have opaque SP adjustments
+ // to force local variables to be addressed from the frame or base
+ // pointers.
+ MFI->setHasOpaqueSPAdjustment(true);
+ }
}
return Chain;
@@ -2701,7 +2772,8 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ StackPtr, PtrOff);
if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
@@ -2718,7 +2790,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
bool IsTailCall, bool Is64Bit,
int FPDiff, SDLoc dl) const {
// Adjust the Return address stack slot.
- EVT VT = getPointerTy();
+ EVT VT = getPointerTy(DAG.getDataLayout());
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
@@ -2942,7 +3014,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(VA.isMemLoc());
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
- getPointerTy());
+ getPointerTy(DAG.getDataLayout()));
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
}
@@ -2955,8 +3027,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (!isTailCall) {
- RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
- DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
+ RegsToPass.push_back(std::make_pair(
+ unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
+ getPointerTy(DAG.getDataLayout()))));
} else {
// If we are tail calling and generating PIC/GOT style code load the
// address of the callee into ECX. The value in ecx is used as target of
@@ -3036,16 +3109,16 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
- FIN = DAG.getFrameIndex(FI, getPointerTy());
+ FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
if (Flags.isByVal()) {
// Copy relative to framepointer.
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, dl,
- RegInfo->getStackRegister(),
- getPointerTy());
- Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+ StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+ getPointerTy(DAG.getDataLayout()));
+ Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ StackPtr, Source);
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
ArgChain,
@@ -3064,8 +3137,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
- getPointerTy(), RegInfo->getSlotSize(),
- FPDiff, dl);
+ getPointerTy(DAG.getDataLayout()),
+ RegInfo->getSlotSize(), FPDiff, dl);
}
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -3106,7 +3179,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ !GV->isStrongDefinitionForLinker() &&
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
@@ -3123,17 +3196,18 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ExtraLoad = true;
}
- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
- G->getOffset(), OpFlags);
+ Callee = DAG.getTargetGlobalAddress(
+ GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
// Add a wrapper if needed.
if (WrapperKind != ISD::DELETED_NODE)
- Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+ Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
+ getPointerTy(DAG.getDataLayout()), Callee);
// Add extra indirection if needed.
if (ExtraLoad)
- Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(),
- false, false, false, 0);
+ Callee = DAG.getLoad(
+ getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
@@ -3152,8 +3226,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
OpFlags = X86II::MO_DARWIN_STUB;
}
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
- OpFlags);
+ Callee = DAG.getTargetExternalSymbol(
+ S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
} else if (Subtarget->isTarget64BitILP32() &&
Callee->getValueType(0) == MVT::i32) {
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
@@ -3184,9 +3258,24 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+ const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
+
+ // If this is an invoke in a 32-bit function using an MSVC personality, assume
+ // the function clobbers all registers. If an exception is thrown, the runtime
+ // will not restore CSRs.
+ // FIXME: Model this more precisely so that we can register allocate across
+ // the normal edge and spill and fill across the exceptional edge.
+ if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
+ const Function *CallerFn = MF.getFunction();
+ EHPersonality Pers =
+ CallerFn->hasPersonalityFn()
+ ? classifyEHPersonality(CallerFn->getPersonalityFn())
+ : EHPersonality::Unknown;
+ if (isMSVCEHPersonality(Pers))
+ Mask = RegInfo->getNoPreservedMask();
+ }
+
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
@@ -3650,7 +3739,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
FuncInfo->setRAIndex(ReturnAddrIndex);
}
- return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
}
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
@@ -3881,6 +3970,15 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget->hasLZCNT();
}
+/// isUndefInRange - Return true if every element in Mask, beginning
+/// from position Pos and ending in Pos+Size is undef.
+static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
+ for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
+ if (0 <= Mask[i])
+ return false;
+ return true;
+}
+
/// isUndefOrInRange - Return true if Val is undef or if its value falls within
/// the specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
@@ -4322,6 +4420,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
/// IsUnary to true if only uses one source. Note that this will set IsUnary for
/// shuffles which use a single input multiple times, and in those cases it will
/// adjust the mask to only have indices within that single input.
+/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
static bool getTargetShuffleMask(SDNode *N, MVT VT,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
@@ -4451,6 +4550,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
+ // Mask only contains negative index if an element is zero.
+ if (std::any_of(Mask.begin(), Mask.end(),
+ [](int M){ return M == SM_SentinelZero; }))
+ return false;
break;
case X86ISD::MOVSLDUP:
DecodeMOVSLDUPMask(VT, Mask);
@@ -4764,7 +4867,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
MVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getBitcast(ShVT, SrcOp);
- MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType());
+ MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
assert(NumBits % 8 == 0 && "Only support byte sized shifts");
SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
@@ -5082,7 +5185,8 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
assert(C && "Invalid constant type");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+ SDValue CP =
+ DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
@@ -6857,6 +6961,136 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
return SDValue();
}
+/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
+static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ assert(!Zeroable.all() && "Fully zeroable shuffle mask");
+
+ int Size = Mask.size();
+ int HalfSize = Size / 2;
+ assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+ // Upper half must be undefined.
+ if (!isUndefInRange(Mask, HalfSize, HalfSize))
+ return SDValue();
+
+ // EXTRQ: Extract Len elements from lower half of source, starting at Idx.
+ // Remainder of lower half result is zero and upper half is all undef.
+ auto LowerAsEXTRQ = [&]() {
+ // Determine the extraction length from the part of the
+ // lower half that isn't zeroable.
+ int Len = HalfSize;
+ for (; Len >= 0; --Len)
+ if (!Zeroable[Len - 1])
+ break;
+ assert(Len > 0 && "Zeroable shuffle mask");
+
+ // Attempt to match first Len sequential elements from the lower half.
+ SDValue Src;
+ int Idx = -1;
+ for (int i = 0; i != Len; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ SDValue &V = (M < Size ? V1 : V2);
+ M = M % Size;
+
+ // All mask elements must be in the lower half.
+ if (M > HalfSize)
+ return SDValue();
+
+ if (Idx < 0 || (Src == V && Idx == (M - i))) {
+ Src = V;
+ Idx = M - i;
+ continue;
+ }
+ return SDValue();
+ }
+
+ if (Idx < 0)
+ return SDValue();
+
+ assert((Idx + Len) <= HalfSize && "Illegal extraction mask");
+ int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
+ int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
+ return DAG.getNode(X86ISD::EXTRQI, DL, VT, Src,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
+ };
+
+ if (SDValue ExtrQ = LowerAsEXTRQ())
+ return ExtrQ;
+
+ // INSERTQ: Extract lowest Len elements from lower half of second source and
+ // insert over first source, starting at Idx.
+ // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
+ auto LowerAsInsertQ = [&]() {
+ for (int Idx = 0; Idx != HalfSize; ++Idx) {
+ SDValue Base;
+
+ // Attempt to match first source from mask before insertion point.
+ if (isUndefInRange(Mask, 0, Idx)) {
+ /* EMPTY */
+ } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) {
+ Base = V1;
+ } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) {
+ Base = V2;
+ } else {
+ continue;
+ }
+
+ // Extend the extraction length looking to match both the insertion of
+ // the second source and the remaining elements of the first.
+ for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) {
+ SDValue Insert;
+ int Len = Hi - Idx;
+
+ // Match insertion.
+ if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) {
+ Insert = V1;
+ } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) {
+ Insert = V2;
+ } else {
+ continue;
+ }
+
+ // Match the remaining elements of the lower half.
+ if (isUndefInRange(Mask, Hi, HalfSize - Hi)) {
+ /* EMPTY */
+ } else if ((!Base || (Base == V1)) &&
+ isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) {
+ Base = V1;
+ } else if ((!Base || (Base == V2)) &&
+ isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi,
+ Size + Hi)) {
+ Base = V2;
+ } else {
+ continue;
+ }
+
+ // We may not have a base (first source) - this can safely be undefined.
+ if (!Base)
+ Base = DAG.getUNDEF(VT);
+
+ int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
+ int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
+ return DAG.getNode(X86ISD::INSERTQI, DL, VT, Base, Insert,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
+ }
+ }
+
+ return SDValue();
+ };
+
+ if (SDValue InsertQ = LowerAsInsertQ())
+ return InsertQ;
+
+ return SDValue();
+}
+
/// \brief Lower a vector shuffle as a zero or any extension.
///
/// Given a specific number of elements, element bit width, and extension
@@ -6864,7 +7098,7 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
/// features of the subtarget.
static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) {
assert(Scale > 1 && "Need a scale to extend.");
int NumElements = VT.getVectorNumElements();
int EltBits = VT.getScalarSizeInBits();
@@ -6901,6 +7135,28 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
}
+ // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
+ // to 64-bits.
+ if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
+ assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
+ assert(VT.getSizeInBits() == 128 && "Unexpected vector width!");
+
+ SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+ DAG.getConstant(EltBits, DL, MVT::i8),
+ DAG.getConstant(0, DL, MVT::i8)));
+ if (isUndefInRange(Mask, NumElements/2, NumElements/2))
+ return DAG.getNode(ISD::BITCAST, DL, VT, Lo);
+
+ SDValue Hi =
+ DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+ DAG.getConstant(EltBits, DL, MVT::i8),
+ DAG.getConstant(EltBits, DL, MVT::i8)));
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
+ }
+
// If this would require more than 2 unpack instructions to expand, use
// pshufb when available. We can only use more than 2 unpack instructions
// when zero extending i8 elements which also makes it easier to use pshufb.
@@ -6991,7 +7247,7 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
return SDValue();
return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
- DL, VT, Scale, AnyExt, InputV, Subtarget, DAG);
+ DL, VT, Scale, AnyExt, InputV, Mask, Subtarget, DAG);
};
// The widest scale possible for extending is to a 64-bit integer.
@@ -7166,9 +7422,9 @@ static SDValue lowerVectorShuffleAsElementInsertion(
V2 = DAG.getBitcast(MVT::v2i64, V2);
V2 = DAG.getNode(
X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
- DAG.getConstant(
- V2Index * EltVT.getSizeInBits()/8, DL,
- DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
+ DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL,
+ DAG.getTargetLoweringInfo().getScalarShiftAmountTy(
+ DAG.getDataLayout(), VT)));
V2 = DAG.getBitcast(VT, V2);
}
}
@@ -8518,6 +8774,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, DAG))
return Shift;
+ // See if we can use SSE4A Extraction / Insertion.
+ if (Subtarget->hasSSE4A())
+ if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG))
+ return V;
+
// There are special ways we can lower some single-element blends.
if (NumV2Inputs == 1)
if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2,
@@ -8670,6 +8931,11 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return ZExt;
+ // See if we can use SSE4A Extraction / Insertion.
+ if (Subtarget->hasSSE4A())
+ if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ return V;
+
int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 16; });
@@ -10613,12 +10879,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MaskEltVT.getSizeInBits());
Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
- getZeroVector(MaskVT, Subtarget, DAG, dl),
- Idx, DAG.getConstant(0, dl, getPointerTy()));
+ getZeroVector(MaskVT, Subtarget, DAG, dl), Idx,
+ DAG.getConstant(0, dl, PtrVT));
SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(),
- Perm, DAG.getConstant(0, dl, getPointerTy()));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Perm,
+ DAG.getConstant(0, dl, PtrVT));
}
return SDValue();
}
@@ -11009,17 +11276,16 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
else if (Subtarget->isPICStyleStubPIC())
OpFlag = X86II::MO_PIC_BASE_OFFSET;
- SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
- CP->getAlignment(),
- CP->getOffset(), OpFlag);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetConstantPool(
+ CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), OpFlag);
SDLoc DL(CP);
- Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (OpFlag) {
- Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg,
- SDLoc(), getPointerTy()),
- Result);
+ Result =
+ DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
}
return Result;
@@ -11042,17 +11308,16 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
else if (Subtarget->isPICStyleStubPIC())
OpFlag = X86II::MO_PIC_BASE_OFFSET;
- SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
- OpFlag);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
SDLoc DL(JT);
- Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (OpFlag)
- Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg,
- SDLoc(), getPointerTy()),
- Result);
+ Result =
+ DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
return Result;
}
@@ -11080,24 +11345,24 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
OpFlag = X86II::MO_DARWIN_NONLAZY;
}
- SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag);
SDLoc DL(Op);
- Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->is64Bit()) {
- Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg,
- SDLoc(), getPointerTy()),
- Result);
+ Result =
+ DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
}
// For symbols that require a load from a stub to get the address, emit the
// load.
if (isGlobalStubReference(OpFlag))
- Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
+ Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, false, 0);
return Result;
@@ -11112,20 +11377,19 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
SDLoc dl(Op);
- SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(), Offset,
- OpFlags);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
- Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
else
- Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+ Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (isGlobalRelativeToPICBase(OpFlags)) {
- Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
- Result);
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
}
return Result;
@@ -11139,40 +11403,40 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, DAG.getTarget());
CodeModel::Model M = DAG.getTarget().getCodeModel();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
if (OpFlags == X86II::MO_NO_FLAG &&
X86::isOffsetSuitableForCodeModel(Offset, M)) {
// A direct static reference to a global.
- Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+ Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
Offset = 0;
} else {
- Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags);
}
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
- Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
else
- Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+ Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (isGlobalRelativeToPICBase(OpFlags)) {
- Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
- Result);
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
}
// For globals that require a load from a stub to get the address, emit the
// load.
if (isGlobalStubReference(OpFlags))
- Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
if (Offset != 0)
- Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
- DAG.getConstant(Offset, dl, getPointerTy()));
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result,
+ DAG.getConstant(Offset, dl, PtrVT));
return Result;
}
@@ -11336,22 +11600,23 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
if (Subtarget->isTargetELF()) {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
if (Subtarget->is64Bit())
- return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
- return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
case TLSModel::LocalDynamic:
- return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(),
+ return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT,
Subtarget->is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
- return LowerToTLSExecModel(
- GA, DAG, getPointerTy(), model, Subtarget->is64Bit(),
- DAG.getTarget().getRelocationModel() == Reloc::PIC_);
+ return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget->is64Bit(),
+ DAG.getTarget().getRelocationModel() ==
+ Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
@@ -11374,13 +11639,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
GA->getValueType(0),
GA->getOffset(), OpFlag);
- SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC32, the address is actually $g + Offset.
if (PIC32)
- Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg,
- SDLoc(), getPointerTy()),
+ Offset = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
Offset);
// Lowering the machine isd will make sure everything is in the right
@@ -11397,8 +11661,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// And our return value (tls address) is in the standard call return value
// location.
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
- return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
- Chain.getValue(1));
+ return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1));
}
if (Subtarget->isTargetKnownWindowsMSVC() ||
@@ -11426,50 +11689,50 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
: Type::getInt32PtrTy(*DAG.getContext(),
257));
- SDValue TlsArray =
- Subtarget->is64Bit()
- ? DAG.getIntPtrConstant(0x58, dl)
- : (Subtarget->isTargetWindowsGNU()
- ? DAG.getIntPtrConstant(0x2C, dl)
- : DAG.getExternalSymbol("_tls_array", getPointerTy()));
+ SDValue TlsArray = Subtarget->is64Bit()
+ ? DAG.getIntPtrConstant(0x58, dl)
+ : (Subtarget->isTargetWindowsGNU()
+ ? DAG.getIntPtrConstant(0x2C, dl)
+ : DAG.getExternalSymbol("_tls_array", PtrVT));
SDValue ThreadPointer =
- DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
- MachinePointerInfo(Ptr), false, false, false, 0);
+ DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr), false,
+ false, false, 0);
SDValue res;
if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) {
res = ThreadPointer;
} else {
// Load the _tls_index variable
- SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy());
+ SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT);
if (Subtarget->is64Bit())
- IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain, IDX,
+ IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX,
MachinePointerInfo(), MVT::i32, false, false,
false, 0);
else
- IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
- false, false, false, 0);
+ IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo(), false,
+ false, false, 0);
- SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), dl,
- getPointerTy());
- IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
+ auto &DL = DAG.getDataLayout();
+ SDValue Scale =
+ DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, PtrVT);
+ IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale);
- res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
+ res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX);
}
- res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(),
- false, false, false, 0);
+ res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo(), false, false,
+ false, 0);
// Get the offset of start of .tls section
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), X86II::MO_SECREL);
- SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), TGA);
+ SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
- return DAG.getNode(ISD::ADD, dl, getPointerTy(), res, Offset);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset);
}
llvm_unreachable("TLS not implemented for this target.");
@@ -11564,8 +11827,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
+ auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
@@ -11614,7 +11878,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ auto PtrVT = getPointerTy(MF.getDataLayout());
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
Tys = DAG.getVTList(MVT::Other);
SDValue Ops[] = {
Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
@@ -11656,7 +11921,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
// Build some magic constants.
static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
Constant *C0 = ConstantDataVector::get(*Context, CV0);
- SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);
SmallVector<Constant*,2> CV1;
CV1.push_back(
@@ -11666,7 +11932,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
- SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+ SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
// Load the 64-bit value into an XMM register.
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
@@ -11882,6 +12148,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
if (Op.getValueType().isVector())
return lowerUINT_TO_FP_vec(Op, DAG);
@@ -11904,9 +12171,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Make a 64-bit buffer, and use it to build an FILD.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
if (SrcVT == MVT::i32) {
- SDValue WordOff = DAG.getConstant(4, dl, getPointerTy());
- SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
- getPointerTy(), StackSlot, WordOff);
+ SDValue WordOff = DAG.getConstant(4, dl, PtrVT);
+ SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, WordOff);
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, MachinePointerInfo(),
false, false, 0);
@@ -11940,22 +12206,20 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
APInt FF(32, 0x5F800000ULL);
// Check whether the sign bit is set.
- SDValue SignSet = DAG.getSetCC(dl,
- getSetCCResultType(*DAG.getContext(), MVT::i64),
- Op.getOperand(0),
- DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
+ SDValue SignSet = DAG.getSetCC(
+ dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
+ Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
// Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
SDValue FudgePtr = DAG.getConstantPool(
- ConstantInt::get(*DAG.getContext(), FF.zext(64)),
- getPointerTy());
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)), PtrVT);
// Get a pointer to FF if the sign bit was set, or to 0 otherwise.
SDValue Zero = DAG.getIntPtrConstant(0, dl);
SDValue Four = DAG.getIntPtrConstant(4, dl);
SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
Zero, Four);
- FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+ FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset);
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
@@ -11974,6 +12238,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
EVT DstTy = Op.getValueType();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
@@ -11998,7 +12263,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
unsigned Opc;
if (!IsSigned && isIntegerTypeFTOL(DstTy))
@@ -12032,7 +12297,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
- StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
}
MachineMemOperand *MMO =
@@ -12403,7 +12668,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
Constant *C = ConstantInt::get(*Context, MaskElt);
C = ConstantVector::getSplat(NumElts, C);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
+ SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
@@ -12462,7 +12727,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
CV[0] = ConstantFP::get(*Context,
APFloat(Sem, APInt::getHighBitsSet(SizeInBits, 1)));
Constant *C = ConstantVector::get(CV);
- SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
+ auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ SDValue CPIdx = DAG.getConstantPool(C, PtrVT, 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
@@ -12483,7 +12749,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
APFloat(Sem, APInt::getLowBitsSet(SizeInBits, SizeInBits - 1)));
}
C = ConstantVector::get(CV);
- CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
+ CPIdx = DAG.getConstantPool(C, PtrVT, 16);
SDValue Val = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
@@ -13352,8 +13618,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (hasMinMax) {
switch (SetCCOpcode) {
default: break;
- case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break;
- case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break;
+ case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break;
+ case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break;
}
if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
@@ -14172,8 +14438,8 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
SmallVector<SDValue, 8> Chains;
SDValue Ptr = Ld->getBasePtr();
- SDValue Increment =
- DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl, TLI.getPointerTy());
+ SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
for (unsigned i = 0; i < NumLoads; ++i) {
@@ -14613,7 +14879,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
- EVT SPTy = getPointerTy();
+ MVT SPTy = getPointerTy(DAG.getDataLayout());
if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -14630,8 +14896,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
"have nested arguments.");
}
- const TargetRegisterClass *AddrRegClass =
- getRegClassFor(getPointerTy());
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
@@ -14666,6 +14931,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ auto PtrVT = getPointerTy(MF.getDataLayout());
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@@ -14674,8 +14940,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
- SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
- getPointerTy());
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
}
@@ -14695,8 +14960,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MemOps.push_back(Store);
// Store fp_offset
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- FIN, DAG.getIntPtrConstant(4, DL));
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL));
Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL,
MVT::i32),
@@ -14704,20 +14968,16 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- FIN, DAG.getIntPtrConstant(4, DL));
- SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
- getPointerTy());
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL));
+ SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
MachinePointerInfo(SV, 8),
false, false, 0);
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- FIN, DAG.getIntPtrConstant(8, DL));
- SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
- getPointerTy());
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(8, DL));
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
MachinePointerInfo(SV, 16), false, false, 0);
MemOps.push_back(Store);
@@ -14739,7 +14999,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
EVT ArgVT = Op.getNode()->getValueType(0);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+ uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
uint8_t ArgMode;
// Decide which area this value should be read from.
@@ -14768,7 +15028,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32),
DAG.getConstant(ArgMode, dl, MVT::i8),
DAG.getConstant(Align, dl, MVT::i32)};
- SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+ SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other);
SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
VTs, InstOps, MVT::i64,
MachinePointerInfo(SV),
@@ -14995,6 +15255,20 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
}
+static int getSEHRegistrationNodeSize(const Function *Fn) {
+ if (!Fn->hasPersonalityFn())
+ report_fatal_error(
+ "querying registration node size for function without personality");
+ // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
+ // WinEHStatePass for the full struct definition.
+ switch (classifyEHPersonality(Fn->getPersonalityFn())) {
+ case EHPersonality::MSVC_X86SEH: return 24;
+ case EHPersonality::MSVC_CXX: return 16;
+ default: break;
+ }
+ report_fatal_error("can only recover FP for MSVC EH personality functions");
+}
+
/// When the 32-bit MSVC runtime transfers control to us, either to an outlined
/// function or when returning to a parent frame after catching an exception, we
/// recover the parent frame pointer by doing arithmetic on the incoming EBP.
@@ -15009,7 +15283,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
SDLoc dl;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT PtrVT = TLI.getPointerTy();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
// It's possible that the parent function no longer has a personality function
// if the exceptional code was optimized away, in which case we just return
@@ -15017,15 +15291,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
if (!Fn->hasPersonalityFn())
return EntryEBP;
- // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
- // WinEHStatePass for the full struct definition.
- int RegNodeSize;
- switch (classifyEHPersonality(Fn->getPersonalityFn())) {
- default:
- report_fatal_error("can only recover FP for MSVC EH personality functions");
- case EHPersonality::MSVC_X86SEH: RegNodeSize = 24; break;
- case EHPersonality::MSVC_CXX: RegNodeSize = 16; break;
- }
+ int RegNodeSize = getSEHRegistrationNodeSize(Fn);
// Get an MCSymbol that will ultimately resolve to the frame offset of the EH
// registration.
@@ -15034,7 +15300,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
GlobalValue::getRealLinkageName(Fn->getName()));
SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
SDValue RegNodeFrameOffset =
- DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSymVal);
+ DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal);
// RegNodeBase = EntryEBP - RegNodeSize
// ParentFP = RegNodeBase - RegNodeFrameOffset
@@ -15059,6 +15325,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
case INTR_TYPE_3OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
+ case INTR_TYPE_4OP:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
case INTR_TYPE_1OP_MASK_RM: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
@@ -15143,7 +15412,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Rnd;
if (Op.getNumOperands() == 6)
Rnd = Op.getOperand(5);
- else
+ else
Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Rnd),
@@ -15173,7 +15442,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
- case VPERM_3OP_MASKZ:
+ case VPERM_3OP_MASKZ:
case VPERM_3OP_MASK:
case FMA_OP_MASK3:
case FMA_OP_MASKZ:
@@ -15499,6 +15768,19 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
"llvm.x86.seh.recoverfp must take a function as the first argument");
return recoverFramePointer(DAG, Fn, IncomingFPOp);
}
+
+ case Intrinsic::localaddress: {
+ // Returns one of the stack, base, or frame pointer registers, depending on
+ // which is used to reference local variables.
+ MachineFunction &MF = DAG.getMachineFunction();
+ const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned Reg;
+ if (RegInfo->hasBasePointer(MF))
+ Reg = RegInfo->getBaseRegister();
+ else // This function handles the SP or FP case.
+ Reg = RegInfo->getPtrSizedFrameRegister(MF);
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+ }
}
}
@@ -15712,34 +15994,60 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
+ const Function *Fn = MF.getFunction();
SDLoc dl(Op);
SDValue Chain = Op.getOperand(0);
+ assert(Subtarget->getFrameLowering()->hasFP(MF) &&
+ "using llvm.x86.seh.restoreframe requires a frame pointer");
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT VT = TLI.getPointerTy();
+ MVT VT = TLI.getPointerTy(DAG.getDataLayout());
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FrameReg =
RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
unsigned SPReg = RegInfo->getStackRegister();
+ unsigned SlotSize = RegInfo->getSlotSize();
// Get incoming EBP.
SDValue IncomingEBP =
DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
- // Load [EBP-24] into SP.
- SDValue SPAddr =
- DAG.getNode(ISD::ADD, dl, VT, IncomingEBP, DAG.getConstant(-24, dl, VT));
+ // SP is saved in the first field of every registration node, so load
+ // [EBP-RegNodeSize] into SP.
+ int RegNodeSize = getSEHRegistrationNodeSize(Fn);
+ SDValue SPAddr = DAG.getNode(ISD::ADD, dl, VT, IncomingEBP,
+ DAG.getConstant(-RegNodeSize, dl, VT));
SDValue NewSP =
DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false,
false, VT.getScalarSizeInBits() / 8);
Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP);
- // FIXME: Restore the base pointer in case of stack realignment!
+ if (!RegInfo->needsStackRealignment(MF)) {
+ // Adjust EBP to point back to the original frame position.
+ SDValue NewFP = recoverFramePointer(DAG, Fn, IncomingEBP);
+ Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
+ } else {
+ assert(RegInfo->hasBasePointer(MF) &&
+ "functions with Win32 EH must use frame or base pointer register");
+
+ // Reload the base pointer (ESI) with the adjusted incoming EBP.
+ SDValue NewBP = recoverFramePointer(DAG, Fn, IncomingEBP);
+ Chain = DAG.getCopyToReg(Chain, dl, RegInfo->getBaseRegister(), NewBP);
+
+ // Reload the spilled EBP value, now that the stack and base pointers are
+ // set up.
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ X86FI->setHasSEHFramePtrSave(true);
+ int FI = MF.getFrameInfo()->CreateSpillStackObject(SlotSize, SlotSize);
+ X86FI->setSEHFramePtrSaveIndex(FI);
+ SDValue NewFP = DAG.getLoad(VT, dl, Chain, DAG.getFrameIndex(FI, VT),
+ MachinePointerInfo(), false, false, false,
+ VT.getScalarSizeInBits() / 8);
+ Chain = DAG.getCopyToReg(NewFP, dl, FrameReg, NewFP);
+ }
- // Adjust EBP to point back to the original frame position.
- SDValue NewFP = recoverFramePointer(DAG, MF.getFunction(), IncomingEBP);
- Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
return Chain;
}
@@ -15910,7 +16218,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
@@ -15969,14 +16277,36 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned X86TargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
+ const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+ const MachineFunction &MF = DAG.getMachineFunction();
+
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("esp", X86::ESP)
.Case("rsp", X86::RSP)
+ .Case("ebp", X86::EBP)
+ .Case("rbp", X86::RBP)
.Default(0);
+
+ if (Reg == X86::EBP || Reg == X86::RBP) {
+ if (!TFI.hasFP(MF))
+ report_fatal_error("register " + StringRef(RegName) +
+ " is allocatable: function has no frame pointer");
+#ifndef NDEBUG
+ else {
+ const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned FrameReg =
+ RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
+ assert((FrameReg == X86::EBP || FrameReg == X86::RBP) &&
+ "Invalid Frame Register!");
+ }
+#endif
+ }
+
if (Reg)
return Reg;
+
report_fatal_error("Invalid register name global variable");
}
@@ -15992,7 +16322,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Handler = Op.getOperand(2);
SDLoc dl (Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
@@ -16211,7 +16541,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// Save FP Control Word to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue StackSlot =
+ DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
@@ -16572,7 +16903,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
- getPointerTy());
+ getPointerTy(DAG.getDataLayout()));
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
@@ -16642,9 +16973,9 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
// If we have a signed multiply but no PMULDQ fix up the high parts of a
// unsigned multiply.
if (IsSigned && !Subtarget->hasSSE41()) {
- SDValue ShAmt =
- DAG.getConstant(31, dl,
- DAG.getTargetLoweringInfo().getShiftAmountTy(VT));
+ SDValue ShAmt = DAG.getConstant(
+ 31, dl,
+ DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
@@ -16717,6 +17048,38 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
(Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
+ auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
+ assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");
+ MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
+ SDValue Ex = DAG.getBitcast(ExVT, R);
+
+ if (ShiftAmt >= 32) {
+ // Splat sign to upper i32 dst, and SRA upper i32 src to lower i32.
+ SDValue Upper =
+ getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG);
+ SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
+ ShiftAmt - 32, DAG);
+ if (VT == MVT::v2i64)
+ Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3});
+ if (VT == MVT::v4i64)
+ Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
+ {9, 1, 11, 3, 13, 5, 15, 7});
+ } else {
+ // SRA upper i32, SHL whole i64 and select lower i32.
+ SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
+ ShiftAmt, DAG);
+ SDValue Lower =
+ getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG);
+ Lower = DAG.getBitcast(ExVT, Lower);
+ if (VT == MVT::v2i64)
+ Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3});
+ if (VT == MVT::v4i64)
+ Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
+ {8, 1, 10, 3, 12, 5, 14, 7});
+ }
+ return DAG.getBitcast(VT, Ex);
+ };
+
// Optimize shl/srl/sra with constant shift amount.
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
@@ -16725,6 +17088,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
+ // i64 SRA needs to be performed as partial shifts.
+ if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Op.getOpcode() == ISD::SRA)
+ return ArithmeticShiftRight64(ShiftAmt);
+
if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) {
unsigned NumElts = VT.getVectorNumElements();
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
@@ -16808,7 +17176,12 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
if (ShAmt != ShiftAmt)
return SDValue();
}
- return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
+
+ if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
+ return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
+
+ if (Op.getOpcode() == ISD::SRA)
+ return ArithmeticShiftRight64(ShiftAmt);
}
return SDValue();
@@ -16890,7 +17263,9 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
if (Vals[j] != Amt.getOperand(i + j))
return SDValue();
}
- return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
+
+ if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode()))
+ return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
}
return SDValue();
}
@@ -17042,6 +17417,53 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
}
}
+ // v4i32 Non Uniform Shifts.
+ // If the shift amount is constant we can shift each lane using the SSE2
+ // immediate shifts, else we need to zero-extend each lane to the lower i64
+ // and shift using the SSE2 variable shifts.
+ // The separate results can then be blended together.
+ if (VT == MVT::v4i32) {
+ unsigned Opc = Op.getOpcode();
+ SDValue Amt0, Amt1, Amt2, Amt3;
+ if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
+ Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
+ Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1});
+ Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
+ Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
+ } else {
+ // ISD::SHL is handled above but we include it here for completeness.
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown target vector shift node");
+ case ISD::SHL:
+ Opc = X86ISD::VSHL;
+ break;
+ case ISD::SRL:
+ Opc = X86ISD::VSRL;
+ break;
+ case ISD::SRA:
+ Opc = X86ISD::VSRA;
+ break;
+ }
+ // The SSE2 shifts use the lower i64 as the same shift amount for
+ // all lanes and the upper i64 is ignored. These shuffle masks
+ // optimally zero-extend each lanes on SSE2/SSE41/AVX targets.
+ SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+ Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
+ Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
+ Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
+ Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1});
+ }
+
+ SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
+ SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
+ SDValue R2 = DAG.getNode(Opc, dl, VT, R, Amt2);
+ SDValue R3 = DAG.getNode(Opc, dl, VT, R, Amt3);
+ SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1});
+ SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7});
+ return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
+ }
+
if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
unsigned ShiftOpcode = Op->getOpcode();
@@ -17944,7 +18366,8 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
// the results are returned via SRet in memory.
const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy());
+ SDValue Callee =
+ DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = isF64
? (Type*)StructType::get(ArgTy, ArgTy, nullptr)
@@ -18443,10 +18866,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
- case X86ISD::UMAX: return "X86ISD::UMAX";
- case X86ISD::UMIN: return "X86ISD::UMIN";
- case X86ISD::SMAX: return "X86ISD::SMAX";
- case X86ISD::SMIN: return "X86ISD::SMIN";
case X86ISD::ABS: return "X86ISD::ABS";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
@@ -18456,6 +18875,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
+ case X86ISD::INSERTQI: return "X86ISD::INSERTQI";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
@@ -18478,6 +18899,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
+ case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
@@ -18594,16 +19016,19 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
case X86ISD::AVG: return "X86ISD::AVG";
+ case X86ISD::MULHRS: return "X86ISD::MULHRS";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
+ case X86ISD::FP_TO_SINT_RND: return "X86ISD::FP_TO_SINT_RND";
+ case X86ISD::FP_TO_UINT_RND: return "X86ISD::FP_TO_UINT_RND";
}
return nullptr;
}
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
-bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
@@ -19555,7 +19980,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetRegisterClass *AddrRegClass =
- getRegClassFor(getPointerTy());
+ getRegClassFor(getPointerTy(MF->getDataLayout()));
unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
@@ -19750,7 +20175,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MemOpndSlot = CurOp;
- MVT PVT = getPointerTy();
+ MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
@@ -19882,7 +20307,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
- MVT PVT = getPointerTy();
+ MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
@@ -21377,7 +21802,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
// alignment is valid.
unsigned Align = LN0->getAlignment();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+ unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
EltVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
@@ -21513,14 +21938,15 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector);
- EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
+ auto &DL = DAG.getDataLayout();
+ EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(DL);
SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
DAG.getConstant(0, dl, VecIdxTy));
SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
DAG.getConstant(1, dl, VecIdxTy));
- SDValue ShAmt = DAG.getConstant(32, dl,
- DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64));
+ SDValue ShAmt = DAG.getConstant(
+ 32, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64, DL));
Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf);
Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRA, dl, MVT::i64, BottomHalf, ShAmt));
@@ -21539,10 +21965,11 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
// Replace each use (extract) with a load of the appropriate element.
for (unsigned i = 0; i < 4; ++i) {
uint64_t Offset = EltSize * i;
- SDValue OffsetVal = DAG.getConstant(Offset, dl, TLI.getPointerTy());
+ auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ SDValue OffsetVal = DAG.getConstant(Offset, dl, PtrVT);
- SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
- StackPtr, OffsetVal);
+ SDValue ScalarAddr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, OffsetVal);
// Load the scalar.
Vals[i] = DAG.getLoad(ElementType, dl, Ch,
@@ -21622,16 +22049,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
default: break;
case ISD::SETULT:
case ISD::SETULE:
- Opc = hasUnsigned ? X86ISD::UMIN : 0u; break;
+ Opc = hasUnsigned ? ISD::UMIN : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
- Opc = hasUnsigned ? X86ISD::UMAX : 0u; break;
+ Opc = hasUnsigned ? ISD::UMAX : 0; break;
case ISD::SETLT:
case ISD::SETLE:
- Opc = hasSigned ? X86ISD::SMIN : 0u; break;
+ Opc = hasSigned ? ISD::SMIN : 0; break;
case ISD::SETGT:
case ISD::SETGE:
- Opc = hasSigned ? X86ISD::SMAX : 0u; break;
+ Opc = hasSigned ? ISD::SMAX : 0; break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
@@ -21640,16 +22067,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
default: break;
case ISD::SETULT:
case ISD::SETULE:
- Opc = hasUnsigned ? X86ISD::UMAX : 0u; break;
+ Opc = hasUnsigned ? ISD::UMAX : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
- Opc = hasUnsigned ? X86ISD::UMIN : 0u; break;
+ Opc = hasUnsigned ? ISD::UMIN : 0; break;
case ISD::SETLT:
case ISD::SETLE:
- Opc = hasSigned ? X86ISD::SMAX : 0u; break;
+ Opc = hasSigned ? ISD::SMAX : 0; break;
case ISD::SETGT:
case ISD::SETGE:
- Opc = hasSigned ? X86ISD::SMIN : 0u; break;
+ Opc = hasSigned ? ISD::SMIN : 0; break;
}
}
@@ -22106,7 +22533,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Check if the selector will be produced by CMPP*/PCMP*
Cond.getOpcode() == ISD::SETCC &&
// Check if SETCC has already been promoted
- TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT) {
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
+ CondVT) {
bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
@@ -22826,7 +23254,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
// We shift all of the values by one. In many cases we do not have
// hardware support for this operation. This is better expressed as an ADD
// of two values.
- if (N1SplatC->getZExtValue() == 1)
+ if (N1SplatC->getAPIntValue() == 1)
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
}
@@ -23478,7 +23906,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue Ptr = Ld->getBasePtr();
- SDValue Increment = DAG.getConstant(16, dl, TLI.getPointerTy());
+ SDValue Increment =
+ DAG.getConstant(16, dl, TLI.getPointerTy(DAG.getDataLayout()));
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
NumElems/2);
@@ -23687,7 +24116,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
- SDValue Stride = DAG.getConstant(16, dl, TLI.getPointerTy());
+ SDValue Stride =
+ DAG.getConstant(16, dl, TLI.getPointerTy(DAG.getDataLayout()));
SDValue Ptr0 = St->getBasePtr();
SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
@@ -23760,8 +24190,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
- SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, dl,
- TLI.getPointerTy());
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
SDValue Ptr = St->getBasePtr();
// Perform one or more big stores into memory.
@@ -24659,6 +25089,31 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT InVT = Op0.getValueType();
+ EVT InSVT = InVT.getScalarType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
+ // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
+ if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
+ SDLoc dl(N);
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ InVT.getVectorNumElements());
+ SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
+
+ if (TLI.isOperationLegal(ISD::UINT_TO_FP, DstVT))
+ return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
+
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
+ }
+
+ return SDValue();
+}
+
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
// First try to optimize away the conversion entirely when it's
@@ -24913,6 +25368,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, Subtarget);
+ case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
@@ -25135,7 +25591,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
(matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) ||
matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) {
AsmPieces.clear();
- const std::string &ConstraintsStr = IA->getConstraintString();
+ StringRef ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (clobbersFlagRegisters(AsmPieces))
@@ -25149,7 +25605,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) &&
matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) {
AsmPieces.clear();
- const std::string &ConstraintsStr = IA->getConstraintString();
+ StringRef ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (clobbersFlagRegisters(AsmPieces))
@@ -25176,7 +25632,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
X86TargetLowering::ConstraintType
-X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+X86TargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'R':
@@ -25508,7 +25964,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::pair<unsigned, const TargetRegisterClass *>
X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
// First, see if this is a constraint that directly corresponds to an LLVM
// register class.
@@ -25717,8 +26173,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return Res;
}
-int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
- Type *Ty,
+int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// Scaling factors are not free at all.
// An indexed folded instruction, i.e., inst (reg1, reg2, scale),
@@ -25738,7 +26194,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
// E.g., on Haswell:
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
- if (isLegalAddressingMode(AM, Ty, AS))
+ if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1
// as soon as we use a second register.
return AM.Scale != 0;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 17660891635c..723d5304495c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -233,12 +233,6 @@ namespace llvm {
/// Floating point horizontal sub.
FHSUB,
- /// Unsigned integer max and min.
- UMAX, UMIN,
-
- /// Signed integer max and min.
- SMAX, SMIN,
-
// Integer absolute value
ABS,
@@ -298,8 +292,8 @@ namespace llvm {
// Vector FP round.
VFPROUND,
- // Vector signed integer to double.
- CVTDQ2PD,
+ // Vector signed/unsigned integer to double.
+ CVTDQ2PD, CVTUDQ2PD,
// 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -400,10 +394,15 @@ namespace llvm {
VINSERT,
VEXTRACT,
+ /// SSE4A Extraction and Insertion.
+ EXTRQI, INSERTQI,
+
// Vector multiply packed unsigned doubleword integers
PMULUDQ,
// Vector multiply packed signed doubleword integers
PMULDQ,
+ // Vector Multiply Packed UnsignedIntegers with Round and Scale
+ MULHRS,
// FMA nodes
FMADD,
@@ -429,6 +428,9 @@ namespace llvm {
//with rounding mode
SINT_TO_FP_RND,
UINT_TO_FP_RND,
+
+ // Vector float/double to signed/unsigned integer.
+ FP_TO_SINT_RND, FP_TO_UINT_RND,
// Save xmm argument registers to the stack, according to %al. An operator
// is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,
@@ -599,7 +601,9 @@ namespace llvm {
unsigned getJumpTableEncoding() const override;
bool useSoftFloat() const override;
- MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i8;
+ }
const MCExpr *
LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
@@ -617,7 +621,8 @@ namespace llvm {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contains are placed at 16-byte boundaries while the rest are at
/// 4-byte boundaries.
- unsigned getByValTypeAlignment(Type *Ty) const override;
+ unsigned getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const override;
/// Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
@@ -685,7 +690,8 @@ namespace llvm {
bool isCheapToSpeculateCtlz() const override;
/// Return the value type to use for ISD::SETCC.
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
/// Determine which of the bits specified in Mask are known to be either
/// zero or one and return them in the KnownZero/KnownOne bitsets.
@@ -707,8 +713,7 @@ namespace llvm {
bool ExpandInlineAsm(CallInst *CI) const override;
- ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
@@ -726,8 +731,8 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "i")
return InlineAsm::Constraint_i;
else if (ConstraintCode == "o")
@@ -745,13 +750,12 @@ namespace llvm {
/// error, this returns a register number of 0.
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
/// Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
/// Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
@@ -770,7 +774,7 @@ namespace llvm {
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
- int getScalingFactorCost(const AddrMode &AM, Type *Ty,
+ int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
bool isVectorShiftByScalarCheap(Type *Ty) const override;
@@ -872,7 +876,8 @@ namespace llvm {
return nullptr; // nothing to do, move along.
}
- unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
/// This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index b309b8210851..faa91500b181 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -3136,6 +3136,12 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
+defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulh", mulhs, SSE_INTALU_ITINS_P,
+ HasBWI, 1>;
+defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhu", mulhu, SSE_INTMUL_ITINS_P,
+ HasBWI, 1>;
+defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrs", X86mulhrs, SSE_INTMUL_ITINS_P,
+ HasBWI, 1>, T8PD;
defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
SSE_INTALU_ITINS_P, HasBWI, 1>;
@@ -3230,32 +3236,32 @@ let Predicates = [HasBWI] in {
defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
}
-defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
+defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax,
+defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax,
+defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax,
+defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax,
+defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax,
+defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin,
+defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin,
+defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin,
+defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin,
+defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin,
+defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin,
+defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
//===----------------------------------------------------------------------===//
@@ -4035,7 +4041,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src1,
- _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
+ _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
}
}
@@ -4394,16 +4400,16 @@ def : Pat<(f64 (sint_to_fp GR32:$src)),
def : Pat<(f64 (sint_to_fp GR64:$src)),
(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
-defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR32,
+defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32,
v4f32x_info, i32mem, loadi32,
"cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
XS, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86SuintToFpRnd, GR32, v2f64x_info,
+defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info,
i32mem, loadi32, "cvtusi2sd{l}">,
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
@@ -4604,117 +4610,389 @@ def : Pat<(extloadf32 addr:$src),
def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
Requires<[HasAVX512]>;
-multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
- RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
- X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
- Domain d> {
-let hasSideEffects = 0 in {
- def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
- def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
- !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
- [], d>, EVEX, EVEX_B, EVEX_RC;
- let mayLoad = 1 in
- def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
-} // hasSideEffects = 0
+//===----------------------------------------------------------------------===//
+// AVX-512 Vector convert from signed/unsigned integer to float/double
+// and from float/double to signed/unsigned integer
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNode,
+ string Broadcast = _.BroadcastStr,
+ string Alias = ""> {
+
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
+ (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, EVEX;
+
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _Src.MemOp:$src), OpcodeStr#Alias, "$src", "$src",
+ (_.VT (OpNode (_Src.VT
+ (bitconvert (_Src.LdFrag addr:$src)))))>, EVEX;
+
+ defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _Src.MemOp:$src), OpcodeStr,
+ "${src}"##Broadcast, "${src}"##Broadcast,
+ (_.VT (OpNode (_Src.VT
+ (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
+ ))>, EVEX, EVEX_B;
+}
+// Coversion with SAE - suppress all exceptions
+multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+ defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src), OpcodeStr,
+ "{sae}, $src", "$src, {sae}",
+ (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
+ (i32 FROUND_NO_EXC)))>,
+ EVEX, EVEX_B;
}
-multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
- RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
- X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
- Domain d> {
-let hasSideEffects = 0 in {
- def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
- let mayLoad = 1 in
- def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
-} // hasSideEffects = 0
+// Conversion with rounding control (RC)
+multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+ defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src", "$src, $rc",
+ (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
+ EVEX, EVEX_B, EVEX_RC;
}
-defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
- loadv8f64, f512mem, v8f32, v8f64,
- SSEPackedSingle>, EVEX_V512, VEX_W, PD,
- EVEX_CD8<64, CD8VF>;
+// Extend Float to Double
+multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fextend>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
+ X86vfpextRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
+ X86vfpext, "{1to2}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fextend>,
+ EVEX_V256;
+ }
+}
+
+// Truncate Double to Float
+multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fround>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
+ X86vfproundRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
+ X86vfpround, "{1to2}", "{x}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fround,
+ "{1to4}", "{y}">, EVEX_V256;
+ }
+}
+
+defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">,
+ VEX_W, PD, EVEX_CD8<64, CD8VF>;
+defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">,
+ PS, EVEX_CD8<32, CD8VH>;
-defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
- loadv4f64, f256mem, v8f64, v8f32,
- SSEPackedDouble>, EVEX_V512, PS,
- EVEX_CD8<32, CD8VH>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
-def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
- (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
- (VCVTPD2PSZrr VR512:$src)>;
+let Predicates = [HasVLX] in {
+ def : Pat<(v4f64 (extloadv4f32 addr:$src)),
+ (VCVTPS2PDZ256rm addr:$src)>;
+}
-def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
- (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
- (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
+// Convert Signed/Unsigned Doubleword to Double
+multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNode128> {
+ // No rounding in this op
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode>,
+ EVEX_V512;
-//===----------------------------------------------------------------------===//
-// AVX-512 Vector convert from sign integer to float/double
-//===----------------------------------------------------------------------===//
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
+ OpNode128, "{1to2}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
-defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
- loadv8i64, i512mem, v16f32, v16i32,
- SSEPackedSingle>, EVEX_V512, PS,
- EVEX_CD8<32, CD8VF>;
+// Convert Signed/Unsigned Doubleword to Float
+multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd> {
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
+ OpNodeRnd>, EVEX_V512;
+
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Float to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Float to Signed/Unsigned Doubleword
+multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Double to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ // we need "x"/"y" suffixes in order to distinguish between 128 and 256
+ // memory forms of these instructions in Asm Parcer. They have the same
+ // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
+ // due to the same reason.
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
+ "{1to2}", "{x}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
+ "{1to4}", "{y}">, EVEX_V256;
+ }
+}
+
+// Convert Double to Signed/Unsigned Doubleword
+multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ // we need "x"/"y" suffixes in order to distinguish between 128 and 256
+ // memory forms of these instructions in Asm Parcer. They have the same
+ // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
+ // due to the same reason.
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
+ "{1to2}", "{x}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
+ "{1to4}", "{y}">, EVEX_V256;
+ }
+}
+
+// Convert Double to Signed/Unsigned Quardword
+multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
+ EVEX_V256;
+ }
+}
-defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
- loadv4i64, i256mem, v8f64, v8i32,
- SSEPackedDouble>, EVEX_V512, XS,
+// Convert Double to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Signed/Unsigned Quardword to Double
+multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Float to Signed/Unsigned Quardword
+multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ // Explicitly specified broadcast string, since we take only 2 elements
+ // from v4f32x_info source
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
+ "{1to2}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Float to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ // Explicitly specified broadcast string, since we take only 2 elements
+ // from v4f32x_info source
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
+ "{1to2}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Signed/Unsigned Quardword to Float
+multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ // we need "x"/"y" suffixes in order to distinguish between 128 and 256
+ // memory forms of these instructions in Asm Parcer. They have the same
+ // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
+ // due to the same reason.
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode,
+ "{1to2}", "{x}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
+ "{1to4}", "{y}">, EVEX_V256;
+ }
+}
+
+defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86cvtdq2pd>, XS,
EVEX_CD8<32, CD8VH>;
-defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
- loadv16f32, f512mem, v16i32, v16f32,
- SSEPackedSingle>, EVEX_V512, XS,
+defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
+ X86VSintToFpRnd>,
+ PS, EVEX_CD8<32, CD8VF>;
+
+defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
+ X86VFpToSintRnd>,
+ XS, EVEX_CD8<32, CD8VF>;
+
+defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint,
+ X86VFpToSintRnd>,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
+ X86VFpToUintRnd>, PS,
EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
- loadv8f64, f512mem, v8i32, v8f64,
- SSEPackedDouble>, EVEX_V512, PD, VEX_W,
+defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
+ X86VFpToUintRnd>, PS, VEX_W,
EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
- loadv16f32, f512mem, v16i32, v16f32,
- SSEPackedSingle>, EVEX_V512, PS,
+defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,
+ XS, EVEX_CD8<32, CD8VH>;
+
+defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
+ X86VUintToFpRnd>, XD,
EVEX_CD8<32, CD8VF>;
-// cvttps2udq (src, 0, mask-all-ones, sae-current)
-def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
- (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
- (VCVTTPS2UDQZrr VR512:$src)>;
+defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtps2Int,
+ X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
- loadv8f64, f512mem, v8i32, v8f64,
- SSEPackedDouble>, EVEX_V512, PS, VEX_W,
+defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtpd2Int,
+ X86cvtpd2IntRnd>, XD, VEX_W,
EVEX_CD8<64, CD8VF>;
-// cvttpd2udq (src, 0, mask-all-ones, sae-current)
-def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
- (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
- (VCVTTPD2UDQZrr VR512:$src)>;
+defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtps2UInt,
+ X86cvtps2UIntRnd>,
+ PS, EVEX_CD8<32, CD8VF>;
+defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtpd2UInt,
+ X86cvtpd2UIntRnd>, VEX_W,
+ PS, EVEX_CD8<64, CD8VF>;
-defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
- loadv4i64, f256mem, v8f64, v8i32,
- SSEPackedDouble>, EVEX_V512, XS,
- EVEX_CD8<32, CD8VH>;
+defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtpd2Int,
+ X86cvtpd2IntRnd>, VEX_W,
+ PD, EVEX_CD8<64, CD8VF>;
-defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
- loadv16i32, f512mem, v16f32, v16i32,
- SSEPackedSingle>, EVEX_V512, XD,
- EVEX_CD8<32, CD8VF>;
+defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtps2Int,
+ X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtpd2UInt,
+ X86cvtpd2UIntRnd>, VEX_W,
+ PD, EVEX_CD8<64, CD8VF>;
+
+defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtps2UInt,
+ X86cvtps2UIntRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
+ X86VFpToSlongRnd>, VEX_W,
+ PD, EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint,
+ X86VFpToSlongRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
+ X86VFpToUlongRnd>, VEX_W,
+ PD, EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint,
+ X86VFpToUlongRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
+ X86VSlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
+defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
+ X86VUlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
+
+defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
+ X86VSlongToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>;
+
+defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
+ X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
+
+let Predicates = [NoVLX] in {
def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
@@ -4734,67 +5012,8 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
(v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
- (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
-def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
- (VCVTDQ2PDZrr VR256X:$src)>;
-def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
- (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
-def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
- (VCVTUDQ2PDZrr VR256X:$src)>;
-
-multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
- RegisterClass DstRC, PatFrag mem_frag,
- X86MemOperand x86memop, Domain d> {
-let hasSideEffects = 0 in {
- def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [], d>, EVEX;
- def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
- !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
- [], d>, EVEX, EVEX_B, EVEX_RC;
- let mayLoad = 1 in
- def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [], d>, EVEX;
-} // hasSideEffects = 0
}
-defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
- loadv16f32, f512mem, SSEPackedSingle>, PD,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
- loadv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
- EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
- (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
- (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
-
-def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
- (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
- (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
-
-defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
- loadv16f32, f512mem, SSEPackedSingle>,
- PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
- loadv8f64, f512mem, SSEPackedDouble>, VEX_W,
- PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
- (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
- (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
-
-def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
- (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
- (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
-
let Predicates = [HasAVX512] in {
def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
(VCVTPD2PSZrm addr:$src)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 6ab961f04ecf..4cd5563ce727 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -105,14 +105,16 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in
// jecxz.
let Uses = [CX] in
def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jcxz\t$dst", [], IIC_JCXZ>, AdSize16;
+ "jcxz\t$dst", [], IIC_JCXZ>, AdSize16,
+ Requires<[Not64BitMode]>;
let Uses = [ECX] in
def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
"jecxz\t$dst", [], IIC_JCXZ>, AdSize32;
let Uses = [RCX] in
def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jrcxz\t$dst", [], IIC_JCXZ>, AdSize64;
+ "jrcxz\t$dst", [], IIC_JCXZ>, AdSize64,
+ Requires<[In64BitMode]>;
}
// Indirect branches
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index fe245c3a7e38..1f61ffa84e9a 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -39,11 +39,6 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>,
SDTCisVec<1>]>;
-def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>;
-def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>;
-def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>;
-def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>;
-
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
@@ -75,6 +70,9 @@ def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
SDTCisVT<1, v4i32>]>>;
+def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD",
+ SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
+ SDTCisVT<1, v4i32>]>>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -187,6 +185,7 @@ def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp>;
def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp>;
def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
+def X86mulhrs : SDNode<"X86ISD::MULHRS" , SDTIntBinOp>;
def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
@@ -208,6 +207,14 @@ def X86pmuldq : SDNode<"X86ISD::PMULDQ",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSameAs<1,2>]>>;
+def X86extrqi : SDNode<"X86ISD::EXTRQI",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i8>, SDTCisVT<3, i8>]>>;
+def X86insertqi : SDNode<"X86ISD::INSERTQI",
+ SDTypeProfile<1, 4, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisVT<3, i8>,
+ SDTCisVT<4, i8>]>>;
+
// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
// translated into one of the target nodes below during lowering.
// Note: this is a work in progress...
@@ -357,8 +364,70 @@ def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
-def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>;
-def X86SuintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>;
+def SDTDoubleToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
+def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
+
+def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
+def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
+
+def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCVecEltisVT<1, i32>,
+ SDTCisInt<2>]>;
+def SDTVlongToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCVecEltisVT<1, i64>,
+ SDTCisInt<2>]>;
+
+def SDTVFPToIntRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<1>, SDTCVecEltisVT<0, i32>,
+ SDTCisInt<2>]>;
+def SDTVFPToLongRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<1>, SDTCVecEltisVT<0, i64>,
+ SDTCisInt<2>]>;
+
+// Scalar
+def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>;
+def X86UintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>;
+
+// Vector with rounding mode
+
+// cvtt fp-to-int staff
+def X86VFpToSintRnd : SDNode<"ISD::FP_TO_SINT", SDTVFPToIntRound>;
+def X86VFpToUintRnd : SDNode<"ISD::FP_TO_UINT", SDTVFPToIntRound>;
+def X86VFpToSlongRnd : SDNode<"ISD::FP_TO_SINT", SDTVFPToLongRound>;
+def X86VFpToUlongRnd : SDNode<"ISD::FP_TO_UINT", SDTVFPToLongRound>;
+
+def X86VSintToFpRnd : SDNode<"ISD::SINT_TO_FP", SDTVintToFPRound>;
+def X86VUintToFpRnd : SDNode<"ISD::UINT_TO_FP", SDTVintToFPRound>;
+def X86VSlongToFpRnd : SDNode<"ISD::SINT_TO_FP", SDTVlongToFPRound>;
+def X86VUlongToFpRnd : SDNode<"ISD::UINT_TO_FP", SDTVlongToFPRound>;
+
+// cvt fp-to-int staff
+def X86cvtps2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTFloatToIntRnd>;
+def X86cvtps2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTFloatToIntRnd>;
+def X86cvtpd2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToIntRnd>;
+def X86cvtpd2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToIntRnd>;
+
+// Vector without rounding mode
+def X86cvtps2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTFloatToInt>;
+def X86cvtps2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTFloatToInt>;
+def X86cvtpd2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToInt>;
+def X86cvtpd2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToInt>;
+
+def X86vfpextRnd : SDNode<"X86ISD::VFPEXT",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>,
+ SDTCisOpSmallerThanOp<1, 0>,
+ SDTCisInt<2>]>>;
+def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>,
+ SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCisInt<2>]>>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index b92ba99fb100..786150760b93 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -269,14 +269,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::XOR8rr, X86::XOR8mr, 0 }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2Addr); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable2Addr[i].RegOp;
- unsigned MemOp = MemoryFoldTable2Addr[i].MemOp;
- unsigned Flags = MemoryFoldTable2Addr[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) {
AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
- RegOp, MemOp,
+ Entry.RegOp, Entry.MemOp,
// Index 0, folded load and store, no alignment requirement.
- Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
+ Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
}
static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
@@ -424,12 +421,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable0); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable0[i].RegOp;
- unsigned MemOp = MemoryFoldTable0[i].MemOp;
- unsigned Flags = MemoryFoldTable0[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) {
AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
- RegOp, MemOp, TB_INDEX_0 | Flags);
+ Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags);
}
static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
@@ -862,14 +856,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable1); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable1[i].RegOp;
- unsigned MemOp = MemoryFoldTable1[i].MemOp;
- unsigned Flags = MemoryFoldTable1[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) {
AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
- RegOp, MemOp,
+ Entry.RegOp, Entry.MemOp,
// Index 1, folded load
- Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
+ Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
@@ -1116,6 +1107,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
{ X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
{ X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
+ { X86::ROUNDSDr, X86::ROUNDSDm, 0 },
+ { X86::ROUNDSSr, X86::ROUNDSSm, 0 },
{ X86::SBB32rr, X86::SBB32rm, 0 },
{ X86::SBB64rr, X86::SBB64rm, 0 },
{ X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
@@ -1412,6 +1405,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
{ X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
{ X86::VPXORrr, X86::VPXORrm, 0 },
+ { X86::VROUNDSDr, X86::VROUNDSDm, 0 },
+ { X86::VROUNDSSr, X86::VROUNDSSm, 0 },
{ X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
{ X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
{ X86::VSUBPDrr, X86::VSUBPDrm, 0 },
@@ -1733,14 +1728,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable2[i].RegOp;
- unsigned MemOp = MemoryFoldTable2[i].MemOp;
- unsigned Flags = MemoryFoldTable2[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) {
AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
- RegOp, MemOp,
+ Entry.RegOp, Entry.MemOp,
// Index 2, folded load
- Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
+ Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
}
static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
@@ -1949,14 +1941,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable3); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable3[i].RegOp;
- unsigned MemOp = MemoryFoldTable3[i].MemOp;
- unsigned Flags = MemoryFoldTable3[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
- RegOp, MemOp,
+ Entry.RegOp, Entry.MemOp,
// Index 3, folded load
- Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
+ Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
}
static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
@@ -2001,14 +1990,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable4); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable4[i].RegOp;
- unsigned MemOp = MemoryFoldTable4[i].MemOp;
- unsigned Flags = MemoryFoldTable4[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) {
AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
- RegOp, MemOp,
+ Entry.RegOp, Entry.MemOp,
// Index 4, folded load
- Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
+ Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
}
}
@@ -3820,7 +3806,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
X86::MOVPQIto64rr);
if (X86::VR64RegClass.contains(SrcReg))
// Copy from a VR64 register to a GR64 register.
- return X86::MOVSDto64rr;
+ return X86::MMX_MOVD64from64rr;
} else if (X86::GR64RegClass.contains(SrcReg)) {
// Copy from a GR64 register to a VR128 register.
if (X86::VR128XRegClass.contains(DestReg))
@@ -3828,7 +3814,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
X86::MOV64toPQIrr);
// Copy from a GR64 register to a VR64 register.
if (X86::VR64RegClass.contains(DestReg))
- return X86::MOV64toSDrr;
+ return X86::MMX_MOVD64to64rr;
}
// SrcReg(FR32) -> DestReg(GR32)
@@ -6413,22 +6399,40 @@ static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
return true;
-
+
return false;
}
+// TODO: There are many more machine instruction opcodes to match:
+// 1. Other data types (integer, vectors)
+// 2. Other math / logic operations (and, or)
+static bool isAssociativeAndCommutative(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::ADDSDrr:
+ case X86::ADDSSrr:
+ case X86::VADDSDrr:
+ case X86::VADDSSrr:
+ case X86::MULSDrr:
+ case X86::MULSSrr:
+ case X86::VMULSDrr:
+ case X86::VMULSSrr:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// Return true if the input instruction is part of a chain of dependent ops
/// that are suitable for reassociation, otherwise return false.
/// If the instruction's operands must be commuted to have a previous
/// instruction of the same type define the first source operand, Commuted will
/// be set to true.
-static bool isReassocCandidate(const MachineInstr &Inst, unsigned AssocOpcode,
- bool &Commuted) {
- // 1. The instruction must have the correct type.
+static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) {
+ // 1. The operation must be associative and commutative.
// 2. The instruction must have virtual register definitions for its
// operands in the same basic block.
- // 3. The instruction must have a reassociatable sibling.
- if (Inst.getOpcode() == AssocOpcode &&
+ // 3. The instruction must have a reassociable sibling.
+ if (isAssociativeAndCommutative(Inst.getOpcode()) &&
hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
hasReassocSibling(Inst, Commuted))
return true;
@@ -6455,14 +6459,8 @@ bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
// B = A op X (Prev)
// C = B op Y (Root)
- // TODO: There are many more associative instruction types to match:
- // 1. Other forms of scalar FP add (non-AVX)
- // 2. Other data types (double, integer, vectors)
- // 3. Other math / logic operations (mul, and, or)
- unsigned AssocOpcode = X86::VADDSSrr;
-
- bool Commute = false;
- if (isReassocCandidate(Root, AssocOpcode, Commute)) {
+ bool Commute;
+ if (isReassocCandidate(Root, Commute)) {
// We found a sequence of instructions that may be suitable for a
// reassociation of operands to increase ILP. Specify each commutation
// possibility for the Prev instruction in the sequence and let the
@@ -6512,7 +6510,7 @@ static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
MachineOperand &OpC = Root.getOperand(0);
-
+
unsigned RegA = OpA.getReg();
unsigned RegB = OpB.getReg();
unsigned RegX = OpX.getReg();
@@ -6547,7 +6545,7 @@ static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
.addReg(RegX, getKillRegState(KillX))
.addReg(RegY, getKillRegState(KillY));
InsInstrs.push_back(MIB1);
-
+
MachineInstrBuilder MIB2 =
BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
.addReg(RegA, getKillRegState(KillA))
@@ -6579,7 +6577,7 @@ void X86InstrInfo::genAlternativeCodeSequence(
Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
}
assert(Prev && "Unknown pattern for machine combiner");
-
+
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
return;
}
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 6f38cb8eaf33..52bab9c79b45 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -194,7 +194,7 @@ def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
-def X86RecoverFrameAlloc : SDNode<"ISD::FRAME_ALLOC_RECOVER",
+def X86RecoverFrameAlloc : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
@@ -1028,14 +1028,13 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm),
- "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16,
- Requires<[Not64BitMode]>;
+ "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
+def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
+
def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
"push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[Not64BitMode]>;
-def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
- "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16,
- Requires<[Not64BitMode]>;
def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
"push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[Not64BitMode]>;
@@ -1081,9 +1080,6 @@ let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteStore] in {
def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
"push{q}\t$imm", [], IIC_PUSH_IMM>, Requires<[In64BitMode]>;
-def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
- "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16,
- Requires<[In64BitMode]>;
def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
"push{q}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2a896dfe8aa8..a5ff9edf05a3 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4035,13 +4035,13 @@ defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
SSE_INTALU_ITINS_P, 0>;
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
SSE_INTALU_ITINS_P, 0>;
-defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8,
+defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
SSE_INTALU_ITINS_P, 1>;
-defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16,
+defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1>;
-defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8,
+defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
SSE_INTALU_ITINS_P, 1>;
-defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16,
+defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1>;
// Intrinsic forms
@@ -6834,29 +6834,28 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
let Predicates = [HasAVX, NoVLX] in {
- let isCommutable = 0 in
- defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
+ defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
+ defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
+ defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
+ defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
+ defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
+ defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
+ defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
+ defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32,
@@ -6865,29 +6864,28 @@ let Predicates = [HasAVX, NoVLX] in {
}
let Predicates = [HasAVX2, NoVLX] in {
- let isCommutable = 0 in
- defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
+ defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
+ defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
+ defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
+ defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
+ defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
+ defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
+ defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
+ defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32,
@@ -6896,22 +6894,21 @@ let Predicates = [HasAVX2, NoVLX] in {
}
let Constraints = "$src1 = $dst" in {
- let isCommutable = 0 in
- defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
+ defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
+ defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
+ defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
+ defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
+ defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
+ defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
+ defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
+ defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMULDQ : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32,
VR128, memopv2i64, i128mem,
@@ -7773,7 +7770,7 @@ let Constraints = "$src = $dst" in {
def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
(ins VR128:$src, u8imm:$len, u8imm:$idx),
"extrq\t{$idx, $len, $src|$src, $len, $idx}",
- [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len,
+ [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len,
imm:$idx))]>, PD;
def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
@@ -7784,8 +7781,8 @@ def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
"insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
- [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src,
- VR128:$src2, imm:$len, imm:$idx))]>, XD;
+ [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
+ imm:$len, imm:$idx))]>, XD;
def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
"insertq\t{$mask, $src|$src, $mask}",
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 61a33484b8bf..2c8b95bcba22 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -19,7 +19,7 @@ namespace llvm {
enum IntrinsicType {
INTR_NO_TYPE,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
- INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
+ INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
@@ -213,18 +213,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
- X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
@@ -596,60 +596,69 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_128, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_256, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_512, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulh_w_128, INTR_TYPE_2OP_MASK, ISD::MULHS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulh_w_256, INTR_TYPE_2OP_MASK, ISD::MULHS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulh_w_512, INTR_TYPE_2OP_MASK, ISD::MULHS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_128, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_256, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_512, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_d_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_d_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_d_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
@@ -1008,10 +1017,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
@@ -1049,14 +1058,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse3_hsub_ps, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
X86_INTRINSIC_DATA(sse41_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse41_pmaxsb, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(sse41_pmaxsd, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(sse41_pmaxud, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(sse41_pmaxuw, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(sse41_pminsb, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxsb, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxsd, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxud, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxuw, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pminsb, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(sse41_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
@@ -1070,6 +1079,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
+ X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse_comigt_ss, COMI, X86ISD::COMI, ISD::SETGT),
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index d598b55aae3e..e6db9708b677 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -30,59 +30,67 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// pointer for reasons other than it containing dynamic allocation or
/// that FP eliminatation is turned off. For example, Cygwin main function
/// contains stack pointer re-alignment code which requires FP.
- bool ForceFramePointer;
+ bool ForceFramePointer = false;
/// RestoreBasePointerOffset - Non-zero if the function has base pointer
/// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a
/// displacement from the frame pointer to a slot where the base pointer
/// is stashed.
- signed char RestoreBasePointerOffset;
+ signed char RestoreBasePointerOffset = 0;
/// CalleeSavedFrameSize - Size of the callee-saved register portion of the
/// stack frame in bytes.
- unsigned CalleeSavedFrameSize;
+ unsigned CalleeSavedFrameSize = 0;
/// BytesToPopOnReturn - Number of bytes function pops on return (in addition
/// to the space used by the return address).
/// Used on windows platform for stdcall & fastcall name decoration
- unsigned BytesToPopOnReturn;
+ unsigned BytesToPopOnReturn = 0;
/// ReturnAddrIndex - FrameIndex for return slot.
- int ReturnAddrIndex;
+ int ReturnAddrIndex = 0;
/// \brief FrameIndex for return slot.
- int FrameAddrIndex;
+ int FrameAddrIndex = 0;
/// TailCallReturnAddrDelta - The number of bytes by which return address
/// stack slot is moved as the result of tail call optimization.
- int TailCallReturnAddrDelta;
+ int TailCallReturnAddrDelta = 0;
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
/// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg;
+ unsigned SRetReturnReg = 0;
/// GlobalBaseReg - keeps track of the virtual register initialized for
/// use as the global base register. This is used for PIC in some PIC
/// relocation models.
- unsigned GlobalBaseReg;
+ unsigned GlobalBaseReg = 0;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
/// RegSaveFrameIndex - X86-64 vararg func register save area.
- int RegSaveFrameIndex;
+ int RegSaveFrameIndex = 0;
/// VarArgsGPOffset - X86-64 vararg func int reg offset.
- unsigned VarArgsGPOffset;
+ unsigned VarArgsGPOffset = 0;
/// VarArgsFPOffset - X86-64 vararg func fp reg offset.
- unsigned VarArgsFPOffset;
+ unsigned VarArgsFPOffset = 0;
/// ArgumentStackSize - The number of bytes on stack consumed by the arguments
/// being passed on the stack.
- unsigned ArgumentStackSize;
+ unsigned ArgumentStackSize = 0;
/// NumLocalDynamics - Number of local-dynamic TLS accesses.
- unsigned NumLocalDynamics;
+ unsigned NumLocalDynamics = 0;
/// HasPushSequences - Keeps track of whether this function uses sequences
/// of pushes to pass function parameters.
- bool HasPushSequences;
+ bool HasPushSequences = false;
+
+ /// True if the function uses llvm.x86.seh.restoreframe, and it needed a spill
+ /// slot for the frame pointer.
+ bool HasSEHFramePtrSave = false;
+
+ /// The frame index of a stack object containing the original frame pointer
+ /// used to address arguments in a function using a base pointer.
+ int SEHFramePtrSaveIndex = 0;
private:
/// ForwardedMustTailRegParms - A list of virtual and physical registers
@@ -90,40 +98,9 @@ private:
SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
public:
- X86MachineFunctionInfo() : ForceFramePointer(false),
- RestoreBasePointerOffset(0),
- CalleeSavedFrameSize(0),
- BytesToPopOnReturn(0),
- ReturnAddrIndex(0),
- FrameAddrIndex(0),
- TailCallReturnAddrDelta(0),
- SRetReturnReg(0),
- GlobalBaseReg(0),
- VarArgsFrameIndex(0),
- RegSaveFrameIndex(0),
- VarArgsGPOffset(0),
- VarArgsFPOffset(0),
- ArgumentStackSize(0),
- NumLocalDynamics(0),
- HasPushSequences(false) {}
-
- explicit X86MachineFunctionInfo(MachineFunction &MF)
- : ForceFramePointer(false),
- RestoreBasePointerOffset(0),
- CalleeSavedFrameSize(0),
- BytesToPopOnReturn(0),
- ReturnAddrIndex(0),
- FrameAddrIndex(0),
- TailCallReturnAddrDelta(0),
- SRetReturnReg(0),
- GlobalBaseReg(0),
- VarArgsFrameIndex(0),
- RegSaveFrameIndex(0),
- VarArgsGPOffset(0),
- VarArgsFPOffset(0),
- ArgumentStackSize(0),
- NumLocalDynamics(0),
- HasPushSequences(false) {}
+ X86MachineFunctionInfo() = default;
+
+ explicit X86MachineFunctionInfo(MachineFunction &MF) {};
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
@@ -174,6 +151,12 @@ public:
unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+ bool getHasSEHFramePtrSave() const { return HasSEHFramePtrSave; }
+ void setHasSEHFramePtrSave(bool V) { HasSEHFramePtrSave = V; }
+
+ int getSEHFramePtrSaveIndex() const { return SEHFramePtrSaveIndex; }
+ void setSEHFramePtrSaveIndex(int Index) { SEHFramePtrSaveIndex = Index; }
+
SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
return ForwardedMustTailRegParms;
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 0033b5058187..d8495e53e0e3 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -202,7 +202,7 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
unsigned
X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const X86FrameLowering *TFI = getFrameLowering(MF);
unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
@@ -343,7 +343,7 @@ X86RegisterInfo::getNoPreservedMask() const {
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const X86FrameLowering *TFI = getFrameLowering(MF);
// Set the stack-pointer register and its aliases as reserved.
for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
@@ -452,7 +452,7 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// use both the SP and the FP, we need a separate base pointer register.
bool CantUseFP = needsStackRealignment(MF);
bool CantUseSP =
- MFI->hasVarSizedObjects() || MFI->hasInlineAsmWithSPAdjust();
+ MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
return CantUseFP && CantUseSP;
}
@@ -477,9 +477,9 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86FrameLowering *TFI = getFrameLowering(MF);
const Function *F = MF.getFunction();
- unsigned StackAlign =
- MF.getSubtarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttribute(Attribute::StackAlignment));
@@ -503,7 +503,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const X86FrameLowering *TFI = getFrameLowering(MF);
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned BasePtr;
@@ -519,18 +519,17 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else
BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
- // FRAME_ALLOC uses a single offset, with no register. It only works in the
+ // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
// simple FP case, and doesn't work with stack realignment. On 32-bit, the
// offset is from the traditional base pointer location. On 64-bit, the
// offset is from the SP at the end of the prologue, not the FP location. This
// matches the behavior of llvm.frameaddress.
- if (Opc == TargetOpcode::FRAME_ALLOC) {
+ if (Opc == TargetOpcode::LOCAL_ESCAPE) {
MachineOperand &FI = MI.getOperand(FIOperandNum);
bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
int Offset;
if (IsWinEH)
- Offset = static_cast<const X86FrameLowering *>(TFI)
- ->getFrameIndexOffsetFromSP(MF, FrameIndex);
+ Offset = TFI->getFrameIndexOffsetFromSP(MF, FrameIndex);
else
Offset = TFI->getFrameIndexOffset(MF, FrameIndex);
FI.ChangeToImmediate(Offset);
@@ -584,7 +583,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const X86FrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 5ca40bc0091b..ce79fcf9ad81 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -24,11 +24,6 @@ using namespace llvm;
#define DEBUG_TYPE "x86-selectiondag-info"
-X86SelectionDAGInfo::X86SelectionDAGInfo(const DataLayout &DL)
- : TargetSelectionDAGInfo(&DL) {}
-
-X86SelectionDAGInfo::~X86SelectionDAGInfo() {}
-
bool X86SelectionDAGInfo::isBaseRegConflictPossible(
SelectionDAG &DAG, ArrayRef<unsigned> ClobberSet) const {
// We cannot use TRI->hasBasePointer() until *after* we select all basic
@@ -37,7 +32,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
// dynamic stack adjustments (hopefully rare) and the base pointer would
// conflict if we had to use it.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- if (!MFI->hasVarSizedObjects() && !MFI->hasInlineAsmWithSPAdjust())
+ if (!MFI->hasVarSizedObjects() && !MFI->hasOpaqueSPAdjustment())
return false;
const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
@@ -81,8 +76,9 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
if (const char *bzeroEntry = V &&
V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
- EVT IntPtr = DAG.getTargetLoweringInfo().getPointerTy();
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ EVT IntPtr =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index eb7e0ed9de6c..961bd8c8d5ef 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -29,8 +29,7 @@ class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
ArrayRef<unsigned> ClobberSet) const;
public:
- explicit X86SelectionDAGInfo(const DataLayout &DL);
- ~X86SelectionDAGInfo();
+ explicit X86SelectionDAGInfo() = default;
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 3b25d30dc221..dff3624b7efe 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -68,7 +68,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
if (GV->hasDLLImportStorageClass())
return X86II::MO_DLLIMPORT;
- bool isDecl = GV->isDeclarationForLinker();
+ bool isDef = GV->isStrongDefinitionForLinker();
// X86-64 in PIC mode.
if (isPICStyleRIPRel()) {
@@ -80,8 +80,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// If symbol visibility is hidden, the extra load is not needed if
// target is x86-64 or the symbol is definitely defined in the current
// translation unit.
- if (GV->hasDefaultVisibility() &&
- (isDecl || GV->isWeakForLinker()))
+ if (GV->hasDefaultVisibility() && !isDef)
return X86II::MO_GOTPCREL;
} else if (!isTargetWin64()) {
assert(isTargetELF() && "Unknown rip-relative target");
@@ -107,7 +106,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// If this is a strong reference to a definition, it is definitely not
// through a stub.
- if (!isDecl && !GV->isWeakForLinker())
+ if (isDef)
return X86II::MO_PIC_BASE_OFFSET;
// Unless we have a symbol with hidden visibility, we have to go through a
@@ -117,7 +116,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// If symbol visibility is hidden, we have a stub for common symbol
// references and external declarations.
- if (isDecl || GV->hasCommonLinkage()) {
+ if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) {
// Hidden $non_lazy_ptr reference.
return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
}
@@ -131,7 +130,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// If this is a strong reference to a definition, it is definitely not
// through a stub.
- if (!isDecl && !GV->isWeakForLinker())
+ if (isDef)
return X86II::MO_NO_FLAG;
// Unless we have a symbol with hidden visibility, we have to go through a
@@ -193,12 +192,9 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+64bit,+sse2";
}
- // If feature string is not empty, parse features string.
+ // Parse features string and set the CPU.
ParseSubtargetFeatures(CPUName, FullFS);
- // Make sure the right MCSchedModel is used.
- InitCPUSchedModel(CPUName);
-
InstrItins = getInstrItineraryForCPU(CPUName);
// It's important to keep the MCSubtargetInfo feature bits in sync with
@@ -298,9 +294,8 @@ X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,
TargetTriple.getEnvironment() != Triple::CODE16),
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() == Triple::CODE16),
- TSInfo(*TM.getDataLayout()),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering(*this, getStackAlignment()) {
+ TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
if (TM.getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index d420abbe1433..f026d4295f71 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -447,8 +447,26 @@ public:
}
bool isCallingConvWin64(CallingConv::ID CC) const {
- return (isTargetWin64() && CC != CallingConv::X86_64_SysV) ||
- CC == CallingConv::X86_64_Win64;
+ switch (CC) {
+ // On Win64, all these conventions just use the default convention.
+ case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_StdCall:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::X86_VectorCall:
+ case CallingConv::Intel_OCL_BI:
+ return isTargetWin64();
+ // This convention allows using the Win64 convention on other targets.
+ case CallingConv::X86_64_Win64:
+ return true;
+ // This convention allows using the SysV convention on Windows targets.
+ case CallingConv::X86_64_SysV:
+ return false;
+ // Otherwise, who knows what this is.
+ default:
+ return false;
+ }
}
/// ClassifyGlobalReference - Classify a global variable reference for the
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 0c82a700952b..7df726091843 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -89,7 +89,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -117,6 +117,8 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
static const CostTblEntry<MVT::SimpleValueType>
AVX2UniformConstCostTable[] = {
+ { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
+
{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
{ ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
{ ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
@@ -211,6 +213,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
+ { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
{ ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
@@ -261,12 +264,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
+ { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
{ ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
{ ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
{ ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
+ { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
{ ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
// It is not a good idea to vectorize division. We have to scalarize it and
@@ -352,7 +355,7 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
if (Kind == TTI::SK_Reverse) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
unsigned Cost = 1;
if (LT.second.getSizeInBits() > 128)
Cost = 3; // Extract + insert + copy.
@@ -364,7 +367,7 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
if (Kind == TTI::SK_Alternate) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
// The backend knows how to generate a single VEX.256 version of
// instruction VPBLENDW if the target supports AVX2.
@@ -464,8 +467,8 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
- std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
+ std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
+ std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
SSE2ConvTbl[] = {
@@ -537,8 +540,8 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
if (Idx != -1)
return AVX512ConversionTbl[Idx].Cost;
}
- EVT SrcTy = TLI->getValueType(Src);
- EVT DstTy = TLI->getValueType(Dst);
+ EVT SrcTy = TLI->getValueType(DL, Src);
+ EVT DstTy = TLI->getValueType(DL, Dst);
// The function getSimpleVT only handles simple value types.
if (!SrcTy.isSimple() || !DstTy.isSimple())
@@ -667,7 +670,7 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
@@ -740,7 +743,7 @@ unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index != -1U) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
@@ -803,7 +806,7 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
}
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
@@ -850,9 +853,9 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
}
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
unsigned Cost = 0;
- if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() &&
+ if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires expand/truncate for data and a shuffle for mask.
Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
@@ -887,7 +890,7 @@ unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
bool IsPairwise) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
@@ -1117,11 +1120,11 @@ unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {
int DataWidth = DataTy->getPrimitiveSizeInBits();
-
+
// Todo: AVX512 allows gather/scatter, works with strided and random as well
if ((DataWidth < 32) || (Consecutive == 0))
return false;
- if (ST->hasAVX512() || ST->hasAVX2())
+ if (ST->hasAVX512() || ST->hasAVX2())
return true;
return false;
}
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index a83158440193..da3f36c2e27e 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -40,7 +40,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
public:
explicit X86TTIImpl(const X86TargetMachine *TM, Function &F)
- : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
X86TTIImpl(const X86TTIImpl &Arg)
@@ -48,18 +49,6 @@ public:
X86TTIImpl(X86TTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- X86TTIImpl &operator=(const X86TTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- X86TTIImpl &operator=(X86TTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
/// \name Scalar TTI Implementations
/// @{
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 90357257b9ef..9190d0be9e4d 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -113,8 +113,8 @@ char WinEHStatePass::ID = 0;
bool WinEHStatePass::doInitialization(Module &M) {
TheModule = &M;
- FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::frameescape);
- FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::framerecover);
+ FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::localescape);
+ FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::localrecover);
FrameAddress = Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress);
return false;
}
@@ -133,7 +133,7 @@ bool WinEHStatePass::doFinalization(Module &M) {
void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {
// This pass should only insert a stack allocation, memory accesses, and
- // framerecovers.
+ // localrecovers.
AU.setPreservesCFG();
}
@@ -336,9 +336,11 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
FunctionType *TargetFuncTy =
FunctionType::get(Int32Ty, makeArrayRef(&ArgTys[0], 5),
/*isVarArg=*/false);
- Function *Trampoline = Function::Create(
- TrampolineTy, GlobalValue::InternalLinkage,
- Twine("__ehhandler$") + ParentFunc->getName(), TheModule);
+ Function *Trampoline =
+ Function::Create(TrampolineTy, GlobalValue::InternalLinkage,
+ Twine("__ehhandler$") + GlobalValue::getRealLinkageName(
+ ParentFunc->getName()),
+ TheModule);
BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline);
IRBuilder<> Builder(EntryBB);
Value *LSDA = emitEHLSDA(Builder, ParentFunc);
@@ -419,14 +421,14 @@ void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
}
/// Escape RegNode so that we can access it from child handlers. Find the call
-/// to frameescape, if any, in the entry block and append RegNode to the list
+/// to localescape, if any, in the entry block and append RegNode to the list
/// of arguments.
int WinEHStatePass::escapeRegNode(Function &F) {
- // Find the call to frameescape and extract its arguments.
+ // Find the call to localescape and extract its arguments.
IntrinsicInst *EscapeCall = nullptr;
for (Instruction &I : F.getEntryBlock()) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
- if (II && II->getIntrinsicID() == Intrinsic::frameescape) {
+ if (II && II->getIntrinsicID() == Intrinsic::localescape) {
EscapeCall = II;
break;
}
@@ -440,8 +442,10 @@ int WinEHStatePass::escapeRegNode(Function &F) {
// Replace the call (if it exists) with new one. Otherwise, insert at the end
// of the entry block.
- IRBuilder<> Builder(&F.getEntryBlock(),
- EscapeCall ? EscapeCall : F.getEntryBlock().end());
+ Instruction *InsertPt = EscapeCall;
+ if (!EscapeCall)
+ InsertPt = F.getEntryBlock().getTerminator();
+ IRBuilder<> Builder(&F.getEntryBlock(), InsertPt);
Builder.CreateCall(FrameEscape, Args);
if (EscapeCall)
EscapeCall->eraseFromParent();
@@ -520,6 +524,11 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
for (auto &Handler : ActionList) {
if (auto *CH = dyn_cast<CatchHandler>(Handler.get())) {
auto *BA = cast<BlockAddress>(CH->getHandlerBlockOrFunc());
+#ifndef NDEBUG
+ for (BasicBlock *Pred : predecessors(BA->getBasicBlock()))
+ assert(Pred->isLandingPad() &&
+ "WinEHPrepare failed to split block");
+#endif
ExceptBlocks.insert(BA->getBasicBlock());
}
}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index ac954d0a8fa4..b4085835f285 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -40,7 +40,7 @@ static MCInstrInfo *createXCoreMCInstrInfo() {
return X;
}
-static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createXCoreMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitXCoreMCRegisterInfo(X, XCore::LR);
return X;
@@ -48,9 +48,7 @@ static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) {
static MCSubtargetInfo *
createXCoreMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitXCoreMCSubtargetInfo(X, TT, CPU, FS);
- return X;
+ return createXCoreMCSubtargetInfoImpl(TT, CPU, FS);
}
static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
@@ -64,7 +62,8 @@ static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createXCoreMCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index bd834cc5be4b..76c3d8130e75 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -525,12 +525,15 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-void XCoreFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+void XCoreFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
- bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool LRUsed = MRI.isPhysRegModified(XCore::LR);
if (!LRUsed && !MF.getFunction()->isVarArg() &&
MF.getFrameInfo()->estimateStackSize(MF))
@@ -550,7 +553,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (LRUsed) {
// We will handle the LR in the prologue/epilogue
// and allocate space on the stack ourselves.
- MF.getRegInfo().setPhysRegUnused(XCore::LR);
+ SavedRegs.reset(XCore::LR);
XFI->createLRSpillSlot(MF);
}
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index 607c77248952..69c71adc8d3f 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -47,8 +47,8 @@ namespace llvm {
bool hasFP(const MachineFunction &MF) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = nullptr) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS = nullptr) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index f5b180b1ac0d..9d4a966dfba4 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -144,10 +144,9 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
MVT::i32, MskSize);
}
else if (!isUInt<16>(Val)) {
- SDValue CPIdx =
- CurDAG->getTargetConstantPool(ConstantInt::get(
- Type::getInt32Ty(*CurDAG->getContext()), Val),
- getTargetLowering()->getPointerTy());
+ SDValue CPIdx = CurDAG->getTargetConstantPool(
+ ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
+ getTargetLowering()->getPointerTy(CurDAG->getDataLayout()));
SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
MVT::Other, CPIdx,
CurDAG->getEntryNode());
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index aa71241102ff..d62e7428299d 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -281,7 +281,8 @@ static bool IsSmallObject(const GlobalValue *GV, const XCoreTargetLowering &XTL)
if (!ObjType->isSized())
return false;
- unsigned ObjSize = XTL.getDataLayout()->getTypeAllocSize(ObjType);
+ auto &DL = GV->getParent()->getDataLayout();
+ unsigned ObjSize = DL.getTypeAllocSize(ObjType);
return ObjSize < CodeModelLargeSize && ObjSize != 0;
}
@@ -312,8 +313,9 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
Constant *GAI = ConstantExpr::getGetElementPtr(
Type::getInt8Ty(*DAG.getContext()), GA, Idx);
SDValue CP = DAG.getConstantPool(GAI, MVT::i32);
- return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP,
- MachinePointerInfo(), false, false, false, 0);
+ return DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL,
+ DAG.getEntryNode(), CP, MachinePointerInfo(), false,
+ false, false, 0);
}
}
@@ -321,11 +323,11 @@ SDValue XCoreTargetLowering::
LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
{
SDLoc DL(Op);
-
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy());
+ SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT);
- return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, PtrVT, Result);
}
SDValue XCoreTargetLowering::
@@ -378,9 +380,10 @@ SDValue XCoreTargetLowering::
lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base,
int64_t Offset, SelectionDAG &DAG) const
{
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
if ((Offset & 0x3) == 0) {
- return DAG.getLoad(getPointerTy(), DL, Chain, Base, MachinePointerInfo(),
- false, false, false, 0);
+ return DAG.getLoad(PtrVT, DL, Chain, Base, MachinePointerInfo(), false,
+ false, false, 0);
}
// Lower to pair of consecutive word aligned loads plus some bit shifting.
int32_t HighOffset = RoundUpToAlignment(Offset, 4);
@@ -401,11 +404,9 @@ lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base,
SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, DL, MVT::i32);
SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, DL, MVT::i32);
- SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
- LowAddr, MachinePointerInfo(),
+ SDValue Low = DAG.getLoad(PtrVT, DL, Chain, LowAddr, MachinePointerInfo(),
false, false, false, 0);
- SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
- HighAddr, MachinePointerInfo(),
+ SDValue High = DAG.getLoad(PtrVT, DL, Chain, HighAddr, MachinePointerInfo(),
false, false, false, 0);
SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
@@ -435,8 +436,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LD->getAlignment()))
return SDValue();
- unsigned ABIAlignment = getDataLayout()->
- getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+ auto &TD = DAG.getDataLayout();
+ unsigned ABIAlignment = TD.getABITypeAlignment(
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
// Leave aligned load alone.
if (LD->getAlignment() >= ABIAlignment)
return SDValue();
@@ -486,7 +488,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
// Lower to a call to __misaligned_load(BasePtr).
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = TD.getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -495,10 +497,11 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(DL).setChain(Chain)
- .setCallee(CallingConv::C, IntPtrTy,
- DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
- std::move(Args), 0);
+ CLI.setDebugLoc(DL).setChain(Chain).setCallee(
+ CallingConv::C, IntPtrTy,
+ DAG.getExternalSymbol("__misaligned_load",
+ getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ops[] = { CallResult.first, CallResult.second };
@@ -516,8 +519,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
ST->getAlignment())) {
return SDValue();
}
- unsigned ABIAlignment = getDataLayout()->
- getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+ unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
+ ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
// Leave aligned store alone.
if (ST->getAlignment() >= ABIAlignment) {
return SDValue();
@@ -545,7 +548,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
}
// Lower to a call to __misaligned_store(BasePtr, Value).
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -557,10 +560,11 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
- std::move(Args), 0);
+ CLI.setDebugLoc(dl).setChain(Chain).setCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__misaligned_store",
+ getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
@@ -833,9 +837,9 @@ LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
int FI = XFI->createLRSpillSlot(MF);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- return DAG.getLoad(getPointerTy(), SDLoc(Op), DAG.getEntryNode(), FIN,
- MachinePointerInfo::getFixedStack(FI), false, false,
- false, 0);
+ return DAG.getLoad(
+ getPointerTy(DAG.getDataLayout()), SDLoc(Op), DAG.getEntryNode(), FIN,
+ MachinePointerInfo::getFixedStack(FI), false, false, false, 0);
}
SDValue XCoreTargetLowering::
@@ -979,11 +983,10 @@ LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const {
if (N->getMemoryVT() == MVT::i32) {
if (N->getAlignment() < 4)
report_fatal_error("atomic load must be aligned");
- return DAG.getLoad(getPointerTy(), SDLoc(Op), N->getChain(),
- N->getBasePtr(), N->getPointerInfo(),
- N->isVolatile(), N->isNonTemporal(),
- N->isInvariant(), N->getAlignment(),
- N->getAAInfo(), N->getRanges());
+ return DAG.getLoad(getPointerTy(DAG.getDataLayout()), SDLoc(Op),
+ N->getChain(), N->getBasePtr(), N->getPointerInfo(),
+ N->isVolatile(), N->isNonTemporal(), N->isInvariant(),
+ N->getAlignment(), N->getAAInfo(), N->getRanges());
}
if (N->getMemoryVT() == MVT::i16) {
if (N->getAlignment() < 2)
@@ -1150,9 +1153,10 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = RetCCInfo.getNextStackOffset();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, dl,
- getPointerTy(), true), dl);
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getConstant(NumBytes, dl, PtrVT, true), dl);
SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
SmallVector<SDValue, 12> MemOpChains;
@@ -1239,11 +1243,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getConstant(NumBytes, dl, getPointerTy(),
- true),
- DAG.getConstant(0, dl, getPointerTy(), true),
- InFlag, dl);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, dl, PtrVT, true),
+ DAG.getConstant(0, dl, PtrVT, true), InFlag, dl);
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
@@ -1830,7 +1831,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
if (StoreBits % 8) {
break;
}
- unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(
+ unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
unsigned Alignment = ST->getAlignment();
if (Alignment >= ABIAlignment) {
@@ -1924,15 +1925,13 @@ static inline bool isImmUs4(int64_t val)
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool
-XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
- unsigned AS) const {
+bool XCoreTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS) const {
if (Ty->getTypeID() == Type::VoidTyID)
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
- const DataLayout *TD = TM.getDataLayout();
- unsigned Size = TD->getTypeAllocSize(Ty);
+ unsigned Size = DL.getTypeAllocSize(Ty);
if (AM.BaseGV) {
return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
AM.BaseOffs%4 == 0;
@@ -1970,7 +1969,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
std::pair<unsigned, const TargetRegisterClass *>
XCoreTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 97f0494b6fe3..ddd675c5164d 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -101,7 +101,9 @@ namespace llvm {
unsigned getJumpTableEncoding() const override;
- MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+ MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override {
+ return MVT::i32;
+ }
/// LowerOperation - Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -120,8 +122,8 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
private:
const TargetMachine &TM;
@@ -175,8 +177,7 @@ namespace llvm {
// Inline asm support
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
// Expand specifics
SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 1d569e8936df..1cfb57dc3af3 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -222,7 +222,7 @@ XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
XCore::R8, XCore::R9,
0
};
- const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ const XCoreFrameLowering *TFI = getFrameLowering(*MF);
if (TFI->hasFP(*MF))
return CalleeSavedRegsFP;
return CalleeSavedRegs;
@@ -230,7 +230,7 @@ XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const XCoreFrameLowering *TFI = getFrameLowering(MF);
Reserved.set(XCore::CP);
Reserved.set(XCore::DP);
@@ -270,7 +270,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const XCoreInstrInfo &TII =
*static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const XCoreFrameLowering *TFI = getFrameLowering(MF);
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
int StackSize = MF.getFrameInfo()->getStackSize();
@@ -324,7 +324,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const XCoreFrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
}
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index a34884480cea..40568d124de0 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -16,12 +16,6 @@ using namespace llvm;
#define DEBUG_TYPE "xcore-selectiondag-info"
-XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const DataLayout &DL)
- : TargetSelectionDAGInfo(&DL) {}
-
-XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
-}
-
SDValue XCoreSelectionDAGInfo::
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
@@ -36,18 +30,20 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()),
- std::move(Args), 0)
- .setDiscardResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__memcpy_4",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0)
+ .setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index cfd80b3f3172..77b3527d77e3 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -22,8 +22,6 @@ class XCoreTargetMachine;
class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit XCoreSelectionDAGInfo(const DataLayout &DL);
- ~XCoreSelectionDAGInfo();
SDValue
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index c98518b60225..99ad2c88504f 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -28,4 +28,4 @@ void XCoreSubtarget::anchor() { }
XCoreSubtarget::XCoreSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
: XCoreGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this),
- TLInfo(TM, *this), TSInfo(*TM.getDataLayout()) {}
+ TLInfo(TM, *this), TSInfo() {}
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 370b64b26688..f420081868f9 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -85,6 +85,7 @@ extern "C" void LLVMInitializeXCoreTarget() {
}
TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &) { return TargetTransformInfo(XCoreTTIImpl(this)); });
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(XCoreTTIImpl(this, F));
+ });
}
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.h b/lib/Target/XCore/XCoreTargetTransformInfo.h
index 70b47dfa1156..e23aef3e3b4a 100644
--- a/lib/Target/XCore/XCoreTargetTransformInfo.h
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -37,8 +37,9 @@ class XCoreTTIImpl : public BasicTTIImplBase<XCoreTTIImpl> {
const XCoreTargetLowering *getTLI() const { return TLI; }
public:
- explicit XCoreTTIImpl(const XCoreTargetMachine *TM)
- : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+ explicit XCoreTTIImpl(const XCoreTargetMachine *TM, Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
XCoreTTIImpl(const XCoreTTIImpl &Arg)
@@ -46,18 +47,6 @@ public:
XCoreTTIImpl(XCoreTTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- XCoreTTIImpl &operator=(const XCoreTTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- XCoreTTIImpl &operator=(XCoreTTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
unsigned getNumberOfRegisters(bool Vector) {
if (Vector) {
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index f75436328252..4762011d63d8 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -825,7 +825,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
V = GetElementPtrInst::Create(SI->first, V, Ops,
V->getName() + ".idx", Call);
Ops.clear();
- AA.copyValue(OrigLoad->getOperand(0), V);
}
// Since we're replacing a load make sure we take the alignment
// of the previous load.
@@ -837,7 +836,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
newLoad->setAAMetadata(AAInfo);
Args.push_back(newLoad);
- AA.copyValue(OrigLoad, Args.back());
}
}
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 3df17b920a95..336dac45e13a 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMipo
BarrierNoopPass.cpp
ConstantMerge.cpp
DeadArgumentElimination.cpp
+ ElimAvailExtern.cpp
ExtractGV.cpp
FunctionAttrs.cpp
GlobalDCE.cpp
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 76898f275058..d0447640259e 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -326,7 +326,18 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
/// instead.
bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
{
- if (Fn.isDeclaration() || Fn.mayBeOverridden())
+ // We cannot change the arguments if this TU does not define the function or
+ // if the linker may choose a function body from another TU, even if the
+ // nominal linkage indicates that other copies of the function have the same
+ // semantics. In the below example, the dead load from %p may not have been
+ // eliminated from the linker-chosen copy of f, so replacing %p with undef
+ // in callers may introduce undefined behavior.
+ //
+ // define linkonce_odr void @f(i32* %p) {
+ // %v = load i32 %p
+ // ret void
+ // }
+ if (!Fn.isStrongDefinitionForLinker())
return false;
// Functions with local linkage should already have been handled, except the
@@ -334,19 +345,6 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
return false;
- // If a function seen at compile time is not necessarily the one linked to
- // the binary being built, it is illegal to change the actual arguments
- // passed to it. These functions can be captured by isWeakForLinker().
- // *NOTE* that mayBeOverridden() is insufficient for this purpose as it
- // doesn't include linkage types like AvailableExternallyLinkage and
- // LinkOnceODRLinkage. Take link_odr* as an example, it indicates a set of
- // *EQUIVALENT* globals that can be merged at link-time. However, the
- // semantic of *EQUIVALENT*-functions includes parameters. Changing
- // parameters breaks this assumption.
- //
- if (Fn.isWeakForLinker())
- return false;
-
if (Fn.use_empty())
return false;
diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp
new file mode 100644
index 000000000000..67ba72d6a360
--- /dev/null
+++ b/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -0,0 +1,84 @@
+//===-- ElimAvailExtern.cpp - DCE unreachable internal functions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate available external global
+// definitions from the program, turning them into declarations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "elim-avail-extern"
+
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+ struct EliminateAvailableExternally : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ EliminateAvailableExternally() : ModulePass(ID) {
+ initializeEliminateAvailableExternallyPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ // run - Do the EliminateAvailableExternally pass on the specified module,
+ // optionally updating the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M) override;
+ };
+}
+
+char EliminateAvailableExternally::ID = 0;
+INITIALIZE_PASS(EliminateAvailableExternally, "elim-avail-extern",
+ "Eliminate Available Externally Globals", false, false)
+
+ModulePass *llvm::createEliminateAvailableExternallyPass() {
+ return new EliminateAvailableExternally();
+}
+
+bool EliminateAvailableExternally::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Drop initializers of available externally global variables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!I->hasAvailableExternallyLinkage())
+ continue;
+ if (I->hasInitializer()) {
+ Constant *Init = I->getInitializer();
+ I->setInitializer(nullptr);
+ if (isSafeToDestroyConstant(Init))
+ Init->destroyConstant();
+ }
+ I->removeDeadConstantUsers();
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ NumVariables++;
+ }
+
+ // Drop the bodies of available externally functions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!I->hasAvailableExternallyLinkage())
+ continue;
+ if (!I->isDeclaration())
+ // This will set the linkage to external
+ I->deleteBody();
+ I->removeDeadConstantUsers();
+ NumFunctions++;
+ }
+
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 2f8c7d9349b9..b9462f2ffc72 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -93,8 +93,11 @@ namespace {
makeVisible(*I, Delete);
- if (Delete)
+ if (Delete) {
+ // Make this a declaration and drop it's comdat.
I->setInitializer(nullptr);
+ I->setComdat(nullptr);
+ }
}
// Visit the Functions.
@@ -108,8 +111,11 @@ namespace {
makeVisible(*I, Delete);
- if (Delete)
+ if (Delete) {
+ // Make this a declaration and drop it's comdat.
I->deleteBody();
+ I->setComdat(nullptr);
+ }
}
// Visit the Aliases.
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index fcacec3286fa..50f56b0f2afe 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -46,6 +46,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeStripDeadDebugInfoPass(Registry);
initializeStripNonDebugSymbolsPass(Registry);
initializeBarrierNoopPass(Registry);
+ initializeEliminateAvailableExternallyPass(Registry);
}
void LLVMInitializeIPO(LLVMPassRegistryRef R) {
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 963f1bb13aaf..88e5e479136f 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -105,6 +105,7 @@ PassManagerBuilder::PassManagerBuilder() {
VerifyInput = false;
VerifyOutput = false;
MergeFunctions = false;
+ PrepareForLTO = false;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -319,8 +320,8 @@ void PassManagerBuilder::populateModulePassManager(
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
- // on the rotated form.
- MPM.add(createLoopRotatePass());
+ // on the rotated form. Disable header duplication at -Oz.
+ MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization.
@@ -401,6 +402,17 @@ void PassManagerBuilder::populateModulePassManager(
// GlobalOpt already deletes dead functions and globals, at -O2 try a
// late pass of GlobalDCE. It is capable of deleting dead cycles.
if (OptLevel > 1) {
+ if (!PrepareForLTO) {
+ // Remove avail extern fns and globals definitions if we aren't
+ // compiling an object file for later LTO. For LTO we want to preserve
+ // these so they are eligible for inlining at link-time. Note if they
+ // are unreferenced they will be removed by GlobalDCE below, so
+ // this only impacts referenced available externally globals.
+ // Eventually they will be suppressed during codegen, but eliminating
+ // here enables more opportunity for GlobalDCE as it may make
+ // globals referenced by available external functions dead.
+ MPM.add(createEliminateAvailableExternallyPass());
+ }
MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
MPM.add(createConstantMergePass()); // Merge dup global constants
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 010b7b57c3e7..0bd6fd2f226d 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3928,8 +3928,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V =
- SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC, &I))
+ if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1,
+ I.getFastMathFlags(), DL, TLI, DT, AC, &I))
return ReplaceInstUsesWith(I, V);
// Simplify 'fcmp pred X, X'
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 97ea8df757f8..ac934f1bd85c 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetFolder.h"
@@ -177,6 +178,8 @@ private:
// Mode in which we are running the combiner.
const bool MinimizeSize;
+ AliasAnalysis *AA;
+
// Required analyses.
// FIXME: These can never be null and should be references.
AssumptionCache *AC;
@@ -192,10 +195,11 @@ private:
public:
InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
- bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI,
+ bool MinimizeSize, AliasAnalysis *AA,
+ AssumptionCache *AC, TargetLibraryInfo *TLI,
DominatorTree *DT, const DataLayout &DL, LoopInfo *LI)
: Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
- AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {}
+ AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {}
/// \brief Run the combiner over the entire worklist until it is empty.
///
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e7a45330d955..e3179dbeece8 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -749,10 +749,25 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
BasicBlock::iterator BBI = &LI;
- if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
+ AAMDNodes AATags;
+ if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,
+ 6, AA, &AATags)) {
+ if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) {
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_range,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_nonnull,
+ };
+ combineMetadata(NLI, &LI, KnownIDs);
+ };
+
return ReplaceInstUsesWith(
LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
LI.getName() + ".cast"));
+ }
// load(gep null, ...) -> unreachable
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 24446c8578e0..273047279e90 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -14,6 +14,8 @@
#include "InstCombineInternal.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -60,56 +62,6 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
return false;
}
-/// FindScalarElement - Given a vector and an element number, see if the scalar
-/// value is already around as a register, for example if it were inserted then
-/// extracted from the vector.
-static Value *FindScalarElement(Value *V, unsigned EltNo) {
- assert(V->getType()->isVectorTy() && "Not looking at a vector?");
- VectorType *VTy = cast<VectorType>(V->getType());
- unsigned Width = VTy->getNumElements();
- if (EltNo >= Width) // Out of range access.
- return UndefValue::get(VTy->getElementType());
-
- if (Constant *C = dyn_cast<Constant>(V))
- return C->getAggregateElement(EltNo);
-
- if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
- // If this is an insert to a variable element, we don't know what it is.
- if (!isa<ConstantInt>(III->getOperand(2)))
- return nullptr;
- unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
-
- // If this is an insert to the element we are looking for, return the
- // inserted value.
- if (EltNo == IIElt)
- return III->getOperand(1);
-
- // Otherwise, the insertelement doesn't modify the value, recurse on its
- // vector input.
- return FindScalarElement(III->getOperand(0), EltNo);
- }
-
- if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
- unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
- int InEl = SVI->getMaskValue(EltNo);
- if (InEl < 0)
- return UndefValue::get(VTy->getElementType());
- if (InEl < (int)LHSWidth)
- return FindScalarElement(SVI->getOperand(0), InEl);
- return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
- }
-
- // Extract a value from a vector add operation with a constant zero.
- Value *Val = nullptr; Constant *Con = nullptr;
- if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
- if (Con->getAggregateElement(EltNo)->isNullValue())
- return FindScalarElement(Val, EltNo);
- }
-
- // Otherwise, we don't know.
- return nullptr;
-}
-
// If we have a PHI node with a vector type that has only 2 uses: feed
// itself and be an operand of extractelement at a constant location,
// try to replace the PHI of the vector type with a PHI of a scalar type.
@@ -178,6 +130,10 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
}
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+ if (Value *V = SimplifyExtractElementInst(
+ EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC))
+ return ReplaceInstUsesWith(EI, V);
+
// If vector val is constant with all elements the same, replace EI with
// that element. We handle a known element # below.
if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
@@ -190,10 +146,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
unsigned IndexVal = IdxC->getZExtValue();
unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
- // If this is extracting an invalid index, turn this into undef, to avoid
- // crashing the code below.
- if (IndexVal >= VectorWidth)
- return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+ // InstSimplify handles cases where the index is invalid.
+ assert(IndexVal < VectorWidth);
// This instruction only demands the single element from the input vector.
// If the input vector has a single use, simplify it based on this use
@@ -209,16 +163,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
}
}
- if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
- return ReplaceInstUsesWith(EI, Elt);
-
// If the this extractelement is directly using a bitcast from a vector of
// the same number of elements, see if we can find the source element from
// it. In this case, we will end up needing to bitcast the scalars.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
if (VT->getNumElements() == VectorWidth)
- if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+ if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal))
return new BitCastInst(Elt, EI.getType());
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 2a81689f7449..fd34a244f271 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2174,16 +2174,9 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
if (!EV.hasIndices())
return ReplaceInstUsesWith(EV, Agg);
- if (Constant *C = dyn_cast<Constant>(Agg)) {
- if (Constant *C2 = C->getAggregateElement(*EV.idx_begin())) {
- if (EV.getNumIndices() == 0)
- return ReplaceInstUsesWith(EV, C2);
- // Extract the remaining indices out of the constant indexed by the
- // first index
- return ExtractValueInst::Create(C2, EV.getIndices().slice(1));
- }
- return nullptr; // Can't handle other constants
- }
+ if (Value *V =
+ SimplifyExtractValueInst(Agg, EV.getIndices(), DL, TLI, DT, AC))
+ return ReplaceInstUsesWith(EV, V);
if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
// We're extracting from an insertvalue instruction, compare the indices
@@ -2972,8 +2965,9 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
static bool
combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
- AssumptionCache &AC, TargetLibraryInfo &TLI,
- DominatorTree &DT, LoopInfo *LI = nullptr) {
+ AliasAnalysis *AA, AssumptionCache &AC,
+ TargetLibraryInfo &TLI, DominatorTree &DT,
+ LoopInfo *LI = nullptr) {
// Minimizing size?
bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize);
auto &DL = F.getParent()->getDataLayout();
@@ -2998,7 +2992,8 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist))
Changed = true;
- InstCombiner IC(Worklist, &Builder, MinimizeSize, &AC, &TLI, &DT, DL, LI);
+ InstCombiner IC(Worklist, &Builder, MinimizeSize,
+ AA, &AC, &TLI, &DT, DL, LI);
if (IC.run())
Changed = true;
@@ -3017,7 +3012,8 @@ PreservedAnalyses InstCombinePass::run(Function &F,
auto *LI = AM->getCachedResult<LoopAnalysis>(F);
- if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI))
+ // FIXME: The AliasAnalysis is not yet supported in the new pass manager
+ if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, LI))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -3050,6 +3046,7 @@ public:
void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
@@ -3061,6 +3058,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
return false;
// Required analyses.
+ auto AA = &getAnalysis<AliasAnalysis>();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -3069,7 +3067,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI);
+ return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, LI);
}
char InstructionCombiningPass::ID = 0;
@@ -3078,6 +3076,7 @@ INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
"Combine redundant instructions", false, false)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 60903c8b4aaf..d1eba6e70e57 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -656,11 +656,14 @@ namespace {
LeaderTableEntry* Prev = nullptr;
LeaderTableEntry* Curr = &LeaderTable[N];
- while (Curr->Val != I || Curr->BB != BB) {
+ while (Curr && (Curr->Val != I || Curr->BB != BB)) {
Prev = Curr;
Curr = Curr->Next;
}
+ if (!Curr)
+ return;
+
if (Prev) {
Prev->Next = Curr->Next;
} else {
@@ -1304,11 +1307,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
if (V->getType()->getScalarType()->isPointerTy()) {
AliasAnalysis *AA = gvn.getAliasAnalysis();
- for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
- AA->copyValue(LI, NewPHIs[i]);
-
- // Now that we've copied information to the new PHIs, scan through
- // them again and inform alias analysis that we've added potentially
+ // Scan the new PHIs and inform alias analysis that we've added potentially
// escaping uses to any values that are operands to these PHIs.
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
PHINode *P = NewPHIs[i];
@@ -1796,7 +1795,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// In general, GVN unifies expressions over different control-flow
// regions, and so we need a conservative combination of the noalias
// scopes.
- unsigned KnownIDs[] = {
+ static const unsigned KnownIDs[] = {
LLVMContext::MD_tbaa,
LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias,
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 6f0375487af6..2a954d9961f2 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -135,6 +136,10 @@ namespace {
PHINode *IndVar, SCEVExpander &Rewriter);
void SinkUnusedInvariants(Loop *L);
+
+ Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
+ Instruction *InsertPt, Type *Ty,
+ bool &IsHighCostExpansion);
};
}
@@ -496,6 +501,52 @@ struct RewritePhi {
};
}
+Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
+ Loop *L, Instruction *InsertPt,
+ Type *ResultTy,
+ bool &IsHighCostExpansion) {
+ using namespace llvm::PatternMatch;
+
+ if (!Rewriter.isHighCostExpansion(S, L)) {
+ IsHighCostExpansion = false;
+ return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
+ }
+
+ // Before expanding S into an expensive LLVM expression, see if we can use an
+ // already existing value as the expansion for S. There is potential to make
+ // this significantly smarter, but this simple heuristic already gets some
+ // interesting cases.
+
+ SmallVector<BasicBlock *, 4> Latches;
+ L->getLoopLatches(Latches);
+
+ for (BasicBlock *BB : Latches) {
+ ICmpInst::Predicate Pred;
+ Instruction *LHS, *RHS;
+ BasicBlock *TrueBB, *FalseBB;
+
+ if (!match(BB->getTerminator(),
+ m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
+ TrueBB, FalseBB)))
+ continue;
+
+ if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) {
+ IsHighCostExpansion = false;
+ return LHS;
+ }
+
+ if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) {
+ IsHighCostExpansion = false;
+ return RHS;
+ }
+ }
+
+ // We didn't find anything, fall back to using SCEVExpander.
+ assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!");
+ IsHighCostExpansion = true;
+ return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
+}
+
//===----------------------------------------------------------------------===//
// RewriteLoopExitValues - Optimize IV users outside the loop.
// As a side effect, reduces the amount of IV processing within the loop.
@@ -628,7 +679,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
continue;
}
- Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
+ bool HighCost = false;
+ Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst,
+ PN->getType(), HighCost);
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
<< " LoopVal = " << *Inst << "\n");
@@ -637,7 +690,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
DeadInsts.push_back(ExitVal);
continue;
}
- bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L);
// Collect all the candidate PHINodes to be rewritten.
RewritePhiSet.push_back(
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index f0e6d641b180..43fc50e588f8 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -602,7 +602,8 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
while (!I.use_empty()) {
- Instruction *User = I.user_back();
+ Value::user_iterator UI = I.user_begin();
+ auto *User = cast<Instruction>(*UI);
if (!DT->isReachableFromEntry(User->getParent())) {
User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));
continue;
@@ -610,6 +611,16 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
// The user must be a PHI node.
PHINode *PN = cast<PHINode>(User);
+ // Surprisingly, instructions can be used outside of loops without any
+ // exits. This can only happen in PHI nodes if the incoming block is
+ // unreachable.
+ Use &U = UI.getUse();
+ BasicBlock *BB = PN->getIncomingBlock(U);
+ if (!DT->isReachableFromEntry(BB)) {
+ U = UndefValue::get(I.getType());
+ continue;
+ }
+
BasicBlock *ExitBlock = PN->getParent();
assert(ExitBlockSet.count(ExitBlock) &&
"The LCSSA PHI is not in an exit block!");
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index 0325d268c325..1b9859b57790 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -34,6 +34,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
#include <list>
#define LDIST_NAME "loop-distribute"
@@ -55,70 +56,6 @@ static cl::opt<bool> DistributeNonIfConvertible(
STATISTIC(NumLoopsDistributed, "Number of loops distributed");
-/// \brief Remaps instructions in a loop including the preheader.
-static void remapInstructionsInLoop(const SmallVectorImpl<BasicBlock *> &Blocks,
- ValueToValueMapTy &VMap) {
- // Rewrite the code to refer to itself.
- for (auto *BB : Blocks)
- for (auto &Inst : *BB)
- RemapInstruction(&Inst, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
-}
-
-/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
-/// Blocks.
-///
-/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
-/// \p LoopDomBB. Insert the new blocks before block specified in \p Before.
-static Loop *cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
- Loop *OrigLoop, ValueToValueMapTy &VMap,
- const Twine &NameSuffix, LoopInfo *LI,
- DominatorTree *DT,
- SmallVectorImpl<BasicBlock *> &Blocks) {
- Function *F = OrigLoop->getHeader()->getParent();
- Loop *ParentLoop = OrigLoop->getParentLoop();
-
- Loop *NewLoop = new Loop();
- if (ParentLoop)
- ParentLoop->addChildLoop(NewLoop);
- else
- LI->addTopLevelLoop(NewLoop);
-
- BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
- BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
- // To rename the loop PHIs.
- VMap[OrigPH] = NewPH;
- Blocks.push_back(NewPH);
-
- // Update LoopInfo.
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewPH, *LI);
-
- // Update DominatorTree.
- DT->addNewBlock(NewPH, LoopDomBB);
-
- for (BasicBlock *BB : OrigLoop->getBlocks()) {
- BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
- VMap[BB] = NewBB;
-
- // Update LoopInfo.
- NewLoop->addBasicBlockToLoop(NewBB, *LI);
-
- // Update DominatorTree.
- BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
- DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
-
- Blocks.push_back(NewBB);
- }
-
- // Move them physically from the end of the block list.
- F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH);
- F->getBasicBlockList().splice(Before, F->getBasicBlockList(),
- NewLoop->getHeader(), F->end());
-
- return NewLoop;
-}
-
namespace {
/// \brief Maintains the set of instructions of the loop for a partition before
/// cloning. After cloning, it hosts the new loop.
@@ -204,7 +141,9 @@ public:
ValueToValueMapTy &getVMap() { return VMap; }
/// \brief Remaps the cloned instructions using VMap.
- void remapInstructions() { remapInstructionsInLoop(ClonedLoopBlocks, VMap); }
+ void remapInstructions() {
+ remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
+ }
/// \brief Based on the set of instructions selected for this partition,
/// removes the unnecessary ones.
@@ -493,15 +432,14 @@ public:
/// partitions its entry is set to -1.
SmallVector<int, 8>
computePartitionSetForPointers(const LoopAccessInfo &LAI) {
- const LoopAccessInfo::RuntimePointerCheck *RtPtrCheck =
- LAI.getRuntimePointerCheck();
+ const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
unsigned N = RtPtrCheck->Pointers.size();
SmallVector<int, 8> PtrToPartitions(N);
for (unsigned I = 0; I < N; ++I) {
- Value *Ptr = RtPtrCheck->Pointers[I];
+ Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
auto Instructions =
- LAI.getInstructionsForAccess(Ptr, RtPtrCheck->IsWritePtr[I]);
+ LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
int &Partition = PtrToPartitions[I];
// First set it to uninitialized.
@@ -629,121 +567,6 @@ private:
AccessesType Accesses;
};
-/// \brief Handles the loop versioning based on memchecks.
-class LoopVersioning {
-public:
- LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
- DominatorTree *DT,
- const SmallVector<int, 8> *PtrToPartition = nullptr)
- : VersionedLoop(L), NonVersionedLoop(nullptr),
- PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {}
-
- /// \brief Returns true if we need memchecks to disambiguate may-aliasing
- /// accesses.
- bool needsRuntimeChecks() const {
- return LAI.getRuntimePointerCheck()->needsAnyChecking(PtrToPartition);
- }
-
- /// \brief Performs the CFG manipulation part of versioning the loop including
- /// the DominatorTree and LoopInfo updates.
- void versionLoop(Pass *P) {
- Instruction *FirstCheckInst;
- Instruction *MemRuntimeCheck;
- // Add the memcheck in the original preheader (this is empty initially).
- BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader();
- std::tie(FirstCheckInst, MemRuntimeCheck) =
- LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
- assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
-
- // Rename the block to make the IR more readable.
- MemCheckBB->setName(VersionedLoop->getHeader()->getName() +
- ".lver.memcheck");
-
- // Create empty preheader for the loop (and after cloning for the
- // non-versioned loop).
- BasicBlock *PH =
- SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
- PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
-
- // Clone the loop including the preheader.
- //
- // FIXME: This does not currently preserve SimplifyLoop because the exit
- // block is a join between the two loops.
- SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
- NonVersionedLoop =
- cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap,
- ".lver.orig", LI, DT, NonVersionedLoopBlocks);
- remapInstructionsInLoop(NonVersionedLoopBlocks, VMap);
-
- // Insert the conditional branch based on the result of the memchecks.
- Instruction *OrigTerm = MemCheckBB->getTerminator();
- BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
- VersionedLoop->getLoopPreheader(), MemRuntimeCheck,
- OrigTerm);
- OrigTerm->eraseFromParent();
-
- // The loops merge in the original exit block. This is now dominated by the
- // memchecking block.
- DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB);
- }
-
- /// \brief Adds the necessary PHI nodes for the versioned loops based on the
- /// loop-defined values used outside of the loop.
- void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
- BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
- assert(PHIBlock && "No single successor to loop exit block");
-
- for (auto *Inst : DefsUsedOutside) {
- auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]);
- PHINode *PN;
-
- // First see if we have a single-operand PHI with the value defined by the
- // original loop.
- for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
- assert(PN->getNumOperands() == 1 &&
- "Exit block should only have on predecessor");
- if (PN->getIncomingValue(0) == Inst)
- break;
- }
- // If not create it.
- if (!PN) {
- PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
- PHIBlock->begin());
- for (auto *User : Inst->users())
- if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
- User->replaceUsesOfWith(Inst, PN);
- PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
- }
- // Add the new incoming value from the non-versioned loop.
- PN->addIncoming(NonVersionedLoopInst,
- NonVersionedLoop->getExitingBlock());
- }
- }
-
-private:
- /// \brief The original loop. This becomes the "versioned" one, i.e. control
- /// goes if the memchecks all pass.
- Loop *VersionedLoop;
- /// \brief The fall-back loop, i.e. if any of the memchecks fail.
- Loop *NonVersionedLoop;
-
- /// \brief For each memory pointer it contains the partitionId it is used in.
- /// If nullptr, no partitioning is used.
- ///
- /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck().
- /// If the pointer is used in multiple partitions the entry is set to -1.
- const SmallVector<int, 8> *PtrToPartition;
-
- /// \brief This maps the instructions from VersionedLoop to their counterpart
- /// in NonVersionedLoop.
- ValueToValueMapTy VMap;
-
- /// \brief Analyses used.
- const LoopAccessInfo &LAI;
- LoopInfo *LI;
- DominatorTree *DT;
-};
-
/// \brief Returns the instructions that use values defined in the loop.
static SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L) {
SmallVector<Instruction *, 8> UsedOutside;
@@ -929,7 +752,7 @@ private:
LoopVersioning LVer(LAI, L, LI, DT, &PtrToPartition);
if (LVer.needsRuntimeChecks()) {
DEBUG(dbgs() << "\nPointers:\n");
- DEBUG(LAI.getRuntimePointerCheck()->print(dbgs(), 0, &PtrToPartition));
+ DEBUG(LAI.getRuntimePointerChecking()->print(dbgs(), 0, &PtrToPartition));
LVer.versionLoop(this);
LVer.addPHINodes(DefsUsedOutside);
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 714ce914a8b3..a21ca2417ca1 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -508,7 +508,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
ICmpInst *NewPreCond =
cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
- PreCond->replaceAllUsesWith(NewPreCond);
+ PreCondBr->setCondition(NewPreCond);
RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
}
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 25546553fd4d..9d7e57ffebac 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -282,21 +282,21 @@ static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
DEBUG(dbgs() << "Calling populateWorklist called\n");
LoopVector LoopList;
Loop *CurrentLoop = &L;
- std::vector<Loop *> vec = CurrentLoop->getSubLoopsVector();
- while (vec.size() != 0) {
+ const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
+ while (!Vec->empty()) {
// The current loop has multiple subloops in it hence it is not tightly
// nested.
// Discard all loops above it added into Worklist.
- if (vec.size() != 1) {
+ if (Vec->size() != 1) {
LoopList.clear();
return;
}
LoopList.push_back(CurrentLoop);
- CurrentLoop = *(vec.begin());
- vec = CurrentLoop->getSubLoopsVector();
+ CurrentLoop = Vec->front();
+ Vec = &CurrentLoop->getSubLoops();
}
LoopList.push_back(CurrentLoop);
- V.push_back(LoopList);
+ V.push_back(std::move(LoopList));
}
static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 9e7558d9c45f..d78db6c369b3 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -840,8 +840,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
- bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
- if (HasRuntimeUnrollDisablePragma(L)) {
+ bool AllowRuntime =
+ (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime);
+ // Don't unroll a runtime trip count loop with unroll full pragma.
+ if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
AllowRuntime = false;
}
if (Unrolling == Partial) {
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 243db8d70ca2..643f3740eedd 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -301,10 +301,6 @@ void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
// Merged instruction
Instruction *HoistedInst = HoistCand->clone();
- // Notify AA of the new value.
- if (isa<LoadInst>(HoistCand))
- AA->copyValue(HoistCand, HoistedInst);
-
// Hoist instruction.
HoistedInst->insertBefore(HoistPt);
@@ -451,9 +447,6 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
NewPN->addIncoming(Opd1, S0->getParent());
NewPN->addIncoming(Opd2, S1->getParent());
if (NewPN->getType()->getScalarType()->isPointerTy()) {
- // Notify AA of the new value.
- AA->copyValue(Opd1, NewPN);
- AA->copyValue(Opd2, NewPN);
// AA needs to be informed when a PHI-use of the pointer value is added
for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) {
unsigned J = PHINode::getOperandNumForIncomingValue(I);
@@ -491,7 +484,6 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
// Create the new store to be inserted at the join point.
StoreInst *SNew = (StoreInst *)(S0->clone());
Instruction *ANew = A0->clone();
- AA->copyValue(S0, SNew);
SNew->insertBefore(InsertPt);
ANew->insertBefore(SNew);
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 9ecaf102574a..366301ad731a 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -399,8 +399,8 @@ static bool doesNotRequireEntrySafepointBefore(const CallSite &CS) {
// at least if they do, are leaf functions that cause only finite stack
// growth. In particular, the optimizer likes to form things like memsets
// out of stores in the original IR. Another important example is
- // llvm.frameescape which must occur in the entry block. Inserting a
- // safepoint before it is not legal since it could push the frameescape
+ // llvm.localescape which must occur in the entry block. Inserting a
+ // safepoint before it is not legal since it could push the localescape
// out of the entry block.
return true;
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 305175ff8f73..4d3a708fa20e 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1799,11 +1799,10 @@ bool IPSCCP::runOnModule(Module &M) {
if (!TI->use_empty())
TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
TI->eraseFromParent();
+ new UnreachableInst(M.getContext(), BB);
if (&*BB != &F->front())
BlocksToErase.push_back(BB);
- else
- new UnreachableInst(M.getContext(), BB);
continue;
}
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 056dd11b5ab3..d1a0a82b9b08 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2593,13 +2593,21 @@ private:
V = rewriteIntegerLoad(LI);
} else if (NewBeginOffset == NewAllocaBeginOffset &&
canConvertValue(DL, NewAllocaTy, LI.getType())) {
- V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(),
- LI.getName());
+ LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ LI.isVolatile(), LI.getName());
+ if (LI.isVolatile())
+ NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
+ V = NewLI;
} else {
Type *LTy = TargetTy->getPointerTo();
- V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
- getSliceAlign(TargetTy), LI.isVolatile(),
- LI.getName());
+ LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
+ getSliceAlign(TargetTy),
+ LI.isVolatile(), LI.getName());
+ if (LI.isVolatile())
+ NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
+ V = NewLI;
IsPtrAdjusted = true;
}
V = convertValue(DL, IRB, V, TargetTy);
@@ -2722,7 +2730,8 @@ private:
NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
SI.isVolatile());
}
- (void)NewSI;
+ if (SI.isVolatile())
+ NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope());
Pass.DeadInsts.insert(&SI);
deleteIfTriviallyDead(OldOp);
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 53471de6154c..ef7dacac79cb 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -440,8 +440,6 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
// Create the new PHI node, insert it into NewBB at the end of the block
PHINode *NewPHI =
PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
- if (AA)
- AA->copyValue(PN, NewPHI);
// NOTE! This loop walks backwards for a reason! First off, this minimizes
// the cost of removal if we end up removing a large number of values, and
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 470e2d09132e..716e655affb9 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_library(LLVMTransformUtils
LoopUnroll.cpp
LoopUnrollRuntime.cpp
LoopUtils.cpp
+ LoopVersioning.cpp
LowerInvoke.cpp
LowerSwitch.cpp
Mem2Reg.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 4f8d1dfbe5df..cc4d6c6fb192 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -720,3 +721,68 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
nullptr);
}
+
+/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
+void llvm::remapInstructionsInBlocks(
+ const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) {
+ // Rewrite the code to refer to itself.
+ for (auto *BB : Blocks)
+ for (auto &Inst : *BB)
+ RemapInstruction(&Inst, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+}
+
+/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
+/// Blocks.
+///
+/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
+/// \p LoopDomBB. Insert the new blocks before block specified in \p Before.
+Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
+ Loop *OrigLoop, ValueToValueMapTy &VMap,
+ const Twine &NameSuffix, LoopInfo *LI,
+ DominatorTree *DT,
+ SmallVectorImpl<BasicBlock *> &Blocks) {
+ Function *F = OrigLoop->getHeader()->getParent();
+ Loop *ParentLoop = OrigLoop->getParentLoop();
+
+ Loop *NewLoop = new Loop();
+ if (ParentLoop)
+ ParentLoop->addChildLoop(NewLoop);
+ else
+ LI->addTopLevelLoop(NewLoop);
+
+ BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
+ assert(OrigPH && "No preheader");
+ BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
+ // To rename the loop PHIs.
+ VMap[OrigPH] = NewPH;
+ Blocks.push_back(NewPH);
+
+ // Update LoopInfo.
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(NewPH, *LI);
+
+ // Update DominatorTree.
+ DT->addNewBlock(NewPH, LoopDomBB);
+
+ for (BasicBlock *BB : OrigLoop->getBlocks()) {
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
+ VMap[BB] = NewBB;
+
+ // Update LoopInfo.
+ NewLoop->addBasicBlockToLoop(NewBB, *LI);
+
+ // Update DominatorTree.
+ BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
+ DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
+
+ Blocks.push_back(NewBB);
+ }
+
+ // Move them physically from the end of the block list.
+ F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH);
+ F->getBasicBlockList().splice(Before, F->getBasicBlockList(),
+ NewLoop->getHeader(), F->end());
+
+ return NewLoop;
+}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 56085579b61c..50ca6234d0b7 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -900,13 +900,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
if (auto *GO = dyn_cast<GlobalObject>(V)) {
// If there is a large requested alignment and we can, bump up the alignment
- // of the global.
- if (GO->isDeclaration())
- return Align;
- // If the memory we set aside for the global may not be the memory used by
- // the final program then it is impossible for us to reliably enforce the
- // preferred alignment.
- if (GO->isWeakForLinker())
+ // of the global. If the memory we set aside for the global may not be the
+ // memory used by the final program then it is impossible for us to reliably
+ // enforce the preferred alignment.
+ if (!GO->isStrongDefinitionForLinker())
return Align;
if (GO->getAlignment() >= PrefAlign)
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 2e7d21cb171f..5c98043e4632 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -403,7 +403,6 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
PHINode *PN = cast<PHINode>(I);
PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
PN->getName()+".be", BETerminator);
- if (AA) AA->copyValue(PN, NewPN);
// Loop over the PHI node, moving all entries except the one for the
// preheader over to the new PHI node.
diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp
new file mode 100644
index 000000000000..832079d2cf63
--- /dev/null
+++ b/lib/Transforms/Utils/LoopVersioning.cpp
@@ -0,0 +1,106 @@
+//===- LoopVersioning.cpp - Utility to version a loop ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a utility class to perform loop versioning. The versioned
+// loop speculates that otherwise may-aliasing memory accesses don't overlap and
+// emits checks to prove this.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+
+using namespace llvm;
+
+LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
+ DominatorTree *DT,
+ const SmallVector<int, 8> *PtrToPartition)
+ : VersionedLoop(L), NonVersionedLoop(nullptr),
+ PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {
+ assert(L->getExitBlock() && "No single exit block");
+ assert(L->getLoopPreheader() && "No preheader");
+}
+
+bool LoopVersioning::needsRuntimeChecks() const {
+ return LAI.getRuntimePointerChecking()->needsAnyChecking(PtrToPartition);
+}
+
+void LoopVersioning::versionLoop(Pass *P) {
+ Instruction *FirstCheckInst;
+ Instruction *MemRuntimeCheck;
+ // Add the memcheck in the original preheader (this is empty initially).
+ BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader();
+ std::tie(FirstCheckInst, MemRuntimeCheck) =
+ LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
+ assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
+
+ // Rename the block to make the IR more readable.
+ MemCheckBB->setName(VersionedLoop->getHeader()->getName() + ".lver.memcheck");
+
+ // Create empty preheader for the loop (and after cloning for the
+ // non-versioned loop).
+ BasicBlock *PH = SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
+ PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
+
+ // Clone the loop including the preheader.
+ //
+ // FIXME: This does not currently preserve SimplifyLoop because the exit
+ // block is a join between the two loops.
+ SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
+ NonVersionedLoop =
+ cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap, ".lver.orig",
+ LI, DT, NonVersionedLoopBlocks);
+ remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap);
+
+ // Insert the conditional branch based on the result of the memchecks.
+ Instruction *OrigTerm = MemCheckBB->getTerminator();
+ BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
+ VersionedLoop->getLoopPreheader(), MemRuntimeCheck,
+ OrigTerm);
+ OrigTerm->eraseFromParent();
+
+ // The loops merge in the original exit block. This is now dominated by the
+ // memchecking block.
+ DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB);
+}
+
+void LoopVersioning::addPHINodes(
+ const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+ BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
+ assert(PHIBlock && "No single successor to loop exit block");
+
+ for (auto *Inst : DefsUsedOutside) {
+ auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]);
+ PHINode *PN;
+
+ // First see if we have a single-operand PHI with the value defined by the
+ // original loop.
+ for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
+ assert(PN->getNumOperands() == 1 &&
+ "Exit block should only have on predecessor");
+ if (PN->getIncomingValue(0) == Inst)
+ break;
+ }
+ // If not create it.
+ if (!PN) {
+ PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
+ PHIBlock->begin());
+ for (auto *User : Inst->users())
+ if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
+ User->replaceUsesOfWith(Inst, PN);
+ PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
+ }
+ // Add the new incoming value from the non-versioned loop.
+ PN->addIncoming(NonVersionedLoopInst, NonVersionedLoop->getExitingBlock());
+ }
+}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5ba14174ac79..69ca2688c810 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -148,8 +148,9 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor(
cl::desc("Maximum factor for an interleaved access group (default = 8)"),
cl::init(8));
-/// We don't unroll loops with a known constant trip count below this number.
-static const unsigned TinyTripCountUnrollThreshold = 128;
+/// We don't interleave loops with a known constant trip count below this
+/// number.
+static const unsigned TinyTripCountInterleaveThreshold = 128;
static cl::opt<unsigned> ForceTargetNumScalarRegs(
"force-target-num-scalar-regs", cl::init(0), cl::Hidden,
@@ -180,7 +181,8 @@ static cl::opt<unsigned> ForceTargetInstructionCost(
static cl::opt<unsigned> SmallLoopCost(
"small-loop-cost", cl::init(20), cl::Hidden,
- cl::desc("The cost of a loop that is considered 'small' by the unroller."));
+ cl::desc(
+ "The cost of a loop that is considered 'small' by the interleaver."));
static cl::opt<bool> LoopVectorizeWithBlockFrequency(
"loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden,
@@ -188,10 +190,11 @@ static cl::opt<bool> LoopVectorizeWithBlockFrequency(
"heuristics minimizing code growth in cold regions and being more "
"aggressive in hot regions."));
-// Runtime unroll loops for load/store throughput.
-static cl::opt<bool> EnableLoadStoreRuntimeUnroll(
- "enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden,
- cl::desc("Enable runtime unrolling until load/store ports are saturated"));
+// Runtime interleave loops for load/store throughput.
+static cl::opt<bool> EnableLoadStoreRuntimeInterleave(
+ "enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden,
+ cl::desc(
+ "Enable runtime interleaving until load/store ports are saturated"));
/// The number of stores in a loop that are allowed to need predication.
static cl::opt<unsigned> NumberOfStoresToPredicate(
@@ -200,15 +203,15 @@ static cl::opt<unsigned> NumberOfStoresToPredicate(
static cl::opt<bool> EnableIndVarRegisterHeur(
"enable-ind-var-reg-heur", cl::init(true), cl::Hidden,
- cl::desc("Count the induction variable only once when unrolling"));
+ cl::desc("Count the induction variable only once when interleaving"));
static cl::opt<bool> EnableCondStoresVectorization(
"enable-cond-stores-vec", cl::init(false), cl::Hidden,
cl::desc("Enable if predication of stores during vectorization."));
-static cl::opt<unsigned> MaxNestedScalarReductionUF(
- "max-nested-scalar-reduction-unroll", cl::init(2), cl::Hidden,
- cl::desc("The maximum unroll factor to use when unrolling a scalar "
+static cl::opt<unsigned> MaxNestedScalarReductionIC(
+ "max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden,
+ cl::desc("The maximum interleave count to use when interleaving a scalar "
"reduction in a nested loop."));
namespace {
@@ -921,8 +924,8 @@ public:
bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
/// Returns the information that we collected about runtime memory check.
- const LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() const {
- return LAI->getRuntimePointerCheck();
+ const RuntimePointerChecking *getRuntimePointerChecking() const {
+ return LAI->getRuntimePointerChecking();
}
const LoopAccessInfo *getLAI() const {
@@ -1105,12 +1108,19 @@ public:
/// 64 bit loop indices.
unsigned getWidestType();
+ /// \return The desired interleave count.
+ /// If interleave count has been specified by metadata it will be returned.
+ /// Otherwise, the interleave count is computed and returned. VF and LoopCost
+ /// are the selected vectorization factor and the cost of the selected VF.
+ unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
+ unsigned LoopCost);
+
/// \return The most profitable unroll factor.
- /// If UserUF is non-zero then this method finds the best unroll-factor
- /// based on register pressure and other parameters.
- /// VF and LoopCost are the selected vectorization factor and the cost of the
- /// selected VF.
- unsigned selectUnrollFactor(bool OptForSize, unsigned VF, unsigned LoopCost);
+ /// This method finds the best unroll-factor based on register pressure and
+ /// other parameters. VF and LoopCost are the selected vectorization factor
+ /// and the cost of the selected VF.
+ unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
+ unsigned LoopCost);
/// \brief A struct that represents some properties of the register usage
/// of a loop.
@@ -1456,9 +1466,14 @@ struct LoopVectorize : public FunctionPass {
const BranchProbability ColdProb(1, 5); // 20%
ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb;
- // If the target claims to have no vector registers don't attempt
- // vectorization.
- if (!TTI->getNumberOfRegisters(true))
+ // Don't attempt if
+ // 1. the target claims to have no vector registers, and
+ // 2. interleaving won't help ILP.
+ //
+ // The second condition is necessary because, even if the target has no
+ // vector registers, loop vectorization may still enable scalar
+ // interleaving.
+ if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
return false;
// Build up a worklist of inner-loops to vectorize. This is necessary as
@@ -1633,18 +1648,17 @@ struct LoopVectorize : public FunctionPass {
const LoopVectorizationCostModel::VectorizationFactor VF =
CM.selectVectorizationFactor(OptForSize);
- // Select the unroll factor.
- const unsigned UF =
- CM.selectUnrollFactor(OptForSize, VF.Width, VF.Cost);
+ // Select the interleave count.
+ unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
<< DebugLocStr << '\n');
- DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
+ DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
if (VF.Width == 1) {
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n");
- if (UF == 1) {
+ if (IC == 1) {
emitOptimizationRemarkAnalysis(
F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
"not beneficial to vectorize and user disabled interleaving");
@@ -1654,17 +1668,14 @@ struct LoopVectorize : public FunctionPass {
// Report the unrolling decision.
emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- Twine("unrolled with interleaving factor " +
- Twine(UF) +
+ Twine("interleaved by " + Twine(IC) +
" (vectorization not beneficial)"));
- // We decided not to vectorize, but we may want to unroll.
-
- InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, UF);
+ InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC);
Unroller.vectorize(&LVL);
} else {
// If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, UF);
+ InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC);
LB.vectorize(&LVL);
++LoopsVectorized;
@@ -1675,10 +1686,10 @@ struct LoopVectorize : public FunctionPass {
AddRuntimeUnrollDisableMetaData(L);
// Report the vectorization decision.
- emitOptimizationRemark(
- F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
- Twine("vectorized loop (vectorization factor: ") + Twine(VF.Width) +
- ", unrolling interleave factor: " + Twine(UF) + ")");
+ emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ Twine("vectorized loop (vectorization width: ") +
+ Twine(VF.Width) + ", interleaved count: " +
+ Twine(IC) + ")");
}
// Mark the loop as already vectorized to avoid vectorizing again.
@@ -1760,31 +1771,6 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
return Builder.CreateAdd(Val, Step, "induction");
}
-/// \brief Find the operand of the GEP that should be checked for consecutive
-/// stores. This ignores trailing indices that have no effect on the final
-/// pointer.
-static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
- const DataLayout &DL = Gep->getModule()->getDataLayout();
- unsigned LastOperand = Gep->getNumOperands() - 1;
- unsigned GEPAllocSize = DL.getTypeAllocSize(
- cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
-
- // Walk backwards and try to peel off zeros.
- while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
- // Find the type we're currently indexing into.
- gep_type_iterator GEPTI = gep_type_begin(Gep);
- std::advance(GEPTI, LastOperand - 1);
-
- // If it's a type with the same allocation size as the result of the GEP we
- // can peel off the zero index.
- if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
- break;
- --LastOperand;
- }
-
- return LastOperand;
-}
-
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
// Make sure that the pointer does not point to structs.
@@ -2503,9 +2489,9 @@ void InnerLoopVectorizer::createEmptyLoop() {
*/
BasicBlock *OldBasicBlock = OrigLoop->getHeader();
- BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
+ BasicBlock *VectorPH = OrigLoop->getLoopPreheader();
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
- assert(BypassBlock && "Invalid loop structure");
+ assert(VectorPH && "Invalid loop structure");
assert(ExitBlock && "Must have an exit block");
// Some loops have a single integer induction variable, while other loops
@@ -2545,44 +2531,35 @@ void InnerLoopVectorizer::createEmptyLoop() {
// loop.
Value *BackedgeCount =
Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(),
- BypassBlock->getTerminator());
+ VectorPH->getTerminator());
if (BackedgeCount->getType()->isPointerTy())
BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy,
"backedge.ptrcnt.to.int",
- BypassBlock->getTerminator());
+ VectorPH->getTerminator());
Instruction *CheckBCOverflow =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount,
Constant::getAllOnesValue(BackedgeCount->getType()),
- "backedge.overflow", BypassBlock->getTerminator());
+ "backedge.overflow", VectorPH->getTerminator());
// The loop index does not have to start at Zero. Find the original start
// value from the induction PHI node. If we don't have an induction variable
// then we know that it starts at zero.
- Builder.SetInsertPoint(BypassBlock->getTerminator());
- Value *StartIdx = ExtendedIdx = OldInduction ?
- Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock),
- IdxTy):
- ConstantInt::get(IdxTy, 0);
-
- // We need an instruction to anchor the overflow check on. StartIdx needs to
- // be defined before the overflow check branch. Because the scalar preheader
- // is going to merge the start index and so the overflow branch block needs to
- // contain a definition of the start index.
- Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd(
- StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor",
- BypassBlock->getTerminator());
+ Builder.SetInsertPoint(VectorPH->getTerminator());
+ Value *StartIdx = ExtendedIdx =
+ OldInduction
+ ? Builder.CreateZExt(OldInduction->getIncomingValueForBlock(VectorPH),
+ IdxTy)
+ : ConstantInt::get(IdxTy, 0);
// Count holds the overall loop count (N).
Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
- BypassBlock->getTerminator());
+ VectorPH->getTerminator());
- LoopBypassBlocks.push_back(BypassBlock);
+ LoopBypassBlocks.push_back(VectorPH);
// Split the single block loop into the two loop structure described above.
- BasicBlock *VectorPH =
- BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
BasicBlock *VecBody =
- VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
+ VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
BasicBlock *MiddleBlock =
VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
BasicBlock *ScalarPH =
@@ -2597,7 +2574,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
if (ParentLoop) {
ParentLoop->addChildLoop(Lp);
ParentLoop->addBasicBlockToLoop(ScalarPH, *LI);
- ParentLoop->addBasicBlockToLoop(VectorPH, *LI);
ParentLoop->addBasicBlockToLoop(MiddleBlock, *LI);
} else {
LI->addTopLevelLoop(Lp);
@@ -2615,9 +2591,20 @@ void InnerLoopVectorizer::createEmptyLoop() {
// times the unroll factor (num of SIMD instructions).
Constant *Step = ConstantInt::get(IdxTy, VF * UF);
+ // Generate code to check that the loop's trip count that we computed by
+ // adding one to the backedge-taken count will not overflow.
+ BasicBlock *NewVectorPH =
+ VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked");
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
+ ReplaceInstWithInst(
+ VectorPH->getTerminator(),
+ BranchInst::Create(ScalarPH, NewVectorPH, CheckBCOverflow));
+ VectorPH = NewVectorPH;
+
// This is the IR builder that we use to add all of the logic for bypassing
// the new vector loop.
- IRBuilder<> BypassBuilder(BypassBlock->getTerminator());
+ IRBuilder<> BypassBuilder(VectorPH->getTerminator());
setDebugLocFromInst(BypassBuilder,
getDebugLocFromInstOrOperands(OldInduction));
@@ -2646,24 +2633,14 @@ void InnerLoopVectorizer::createEmptyLoop() {
// jump to the scalar loop.
Value *Cmp =
BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
-
- BasicBlock *LastBypassBlock = BypassBlock;
-
- // Generate code to check that the loops trip count that we computed by adding
- // one to the backedge-taken count will not overflow.
- {
- auto PastOverflowCheck =
- std::next(BasicBlock::iterator(OverflowCheckAnchor));
- BasicBlock *CheckBlock =
- LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
- LoopBypassBlocks.push_back(CheckBlock);
- ReplaceInstWithInst(
- LastBypassBlock->getTerminator(),
- BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow));
- LastBypassBlock = CheckBlock;
- }
+ NewVectorPH =
+ VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
+ LoopBypassBlocks.push_back(VectorPH);
+ ReplaceInstWithInst(VectorPH->getTerminator(),
+ BranchInst::Create(MiddleBlock, NewVectorPH, Cmp));
+ VectorPH = NewVectorPH;
// Generate the code to check that the strides we assumed to be one are really
// one. We want the new basic block to start at the first instruction in a
@@ -2671,23 +2648,24 @@ void InnerLoopVectorizer::createEmptyLoop() {
Instruction *StrideCheck;
Instruction *FirstCheckInst;
std::tie(FirstCheckInst, StrideCheck) =
- addStrideCheck(LastBypassBlock->getTerminator());
+ addStrideCheck(VectorPH->getTerminator());
if (StrideCheck) {
AddedSafetyChecks = true;
// Create a new block containing the stride check.
- BasicBlock *CheckBlock =
- LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
+ VectorPH->setName("vector.stridecheck");
+ NewVectorPH =
+ VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
- LoopBypassBlocks.push_back(CheckBlock);
+ ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
+ LoopBypassBlocks.push_back(VectorPH);
// Replace the branch into the memory check block with a conditional branch
// for the "few elements case".
- ReplaceInstWithInst(LastBypassBlock->getTerminator(),
- BranchInst::Create(MiddleBlock, CheckBlock, Cmp));
+ ReplaceInstWithInst(
+ VectorPH->getTerminator(),
+ BranchInst::Create(MiddleBlock, NewVectorPH, StrideCheck));
- Cmp = StrideCheck;
- LastBypassBlock = CheckBlock;
+ VectorPH = NewVectorPH;
}
// Generate the code that checks in runtime if arrays overlap. We put the
@@ -2695,28 +2673,26 @@ void InnerLoopVectorizer::createEmptyLoop() {
// faster.
Instruction *MemRuntimeCheck;
std::tie(FirstCheckInst, MemRuntimeCheck) =
- Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator());
+ Legal->getLAI()->addRuntimeCheck(VectorPH->getTerminator());
if (MemRuntimeCheck) {
AddedSafetyChecks = true;
// Create a new block containing the memory check.
- BasicBlock *CheckBlock =
- LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
+ VectorPH->setName("vector.memcheck");
+ NewVectorPH =
+ VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
- LoopBypassBlocks.push_back(CheckBlock);
+ ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
+ LoopBypassBlocks.push_back(VectorPH);
// Replace the branch into the memory check block with a conditional branch
// for the "few elements case".
- ReplaceInstWithInst(LastBypassBlock->getTerminator(),
- BranchInst::Create(MiddleBlock, CheckBlock, Cmp));
+ ReplaceInstWithInst(
+ VectorPH->getTerminator(),
+ BranchInst::Create(MiddleBlock, NewVectorPH, MemRuntimeCheck));
- Cmp = MemRuntimeCheck;
- LastBypassBlock = CheckBlock;
+ VectorPH = NewVectorPH;
}
- ReplaceInstWithInst(LastBypassBlock->getTerminator(),
- BranchInst::Create(MiddleBlock, VectorPH, Cmp));
-
// We are going to resume the execution of the scalar loop.
// Go over all of the induction variables that we found and fix the
// PHIs that are left in the scalar version of the loop.
@@ -3831,7 +3807,7 @@ bool LoopVectorizationLegality::canVectorize() {
}
// We can only vectorize innermost loops.
- if (!TheLoop->getSubLoopsVector().empty()) {
+ if (!TheLoop->empty()) {
emitAnalysis(VectorizationReport() << "loop is not the innermost loop");
return false;
}
@@ -3897,10 +3873,11 @@ bool LoopVectorizationLegality::canVectorize() {
// Collect all of the variables that remain uniform after vectorization.
collectLoopUniforms();
- DEBUG(dbgs() << "LV: We can vectorize this loop" <<
- (LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
- "")
- <<"!\n");
+ DEBUG(dbgs() << "LV: We can vectorize this loop"
+ << (LAI->getRuntimePointerChecking()->Need
+ ? " (with a runtime bound check)"
+ : "")
+ << "!\n");
// Analyze interleaved memory accesses.
if (EnableInterleavedMemAccesses)
@@ -4130,118 +4107,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return true;
}
-///\brief Remove GEPs whose indices but the last one are loop invariant and
-/// return the induction operand of the gep pointer.
-static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (!GEP)
- return Ptr;
-
- unsigned InductionOperand = getGEPInductionOperand(GEP);
-
- // Check that all of the gep indices are uniform except for our induction
- // operand.
- for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
- if (i != InductionOperand &&
- !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
- return Ptr;
- return GEP->getOperand(InductionOperand);
-}
-
-///\brief Look for a cast use of the passed value.
-static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
- Value *UniqueCast = nullptr;
- for (User *U : Ptr->users()) {
- CastInst *CI = dyn_cast<CastInst>(U);
- if (CI && CI->getType() == Ty) {
- if (!UniqueCast)
- UniqueCast = CI;
- else
- return nullptr;
- }
- }
- return UniqueCast;
-}
-
-///\brief Get the stride of a pointer access in a loop.
-/// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a
-/// pointer to the Value, or null otherwise.
-static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
- const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
- if (!PtrTy || PtrTy->isAggregateType())
- return nullptr;
-
- // Try to remove a gep instruction to make the pointer (actually index at this
- // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
- // pointer, otherwise, we are analyzing the index.
- Value *OrigPtr = Ptr;
-
- // The size of the pointer access.
- int64_t PtrAccessSize = 1;
-
- Ptr = stripGetElementPtr(Ptr, SE, Lp);
- const SCEV *V = SE->getSCEV(Ptr);
-
- if (Ptr != OrigPtr)
- // Strip off casts.
- while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
- V = C->getOperand();
-
- const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
- if (!S)
- return nullptr;
-
- V = S->getStepRecurrence(*SE);
- if (!V)
- return nullptr;
-
- // Strip off the size of access multiplication if we are still analyzing the
- // pointer.
- if (OrigPtr == Ptr) {
- const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
- DL.getTypeAllocSize(PtrTy->getElementType());
- if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
- if (M->getOperand(0)->getSCEVType() != scConstant)
- return nullptr;
-
- const APInt &APStepVal =
- cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
-
- // Huge step value - give up.
- if (APStepVal.getBitWidth() > 64)
- return nullptr;
-
- int64_t StepVal = APStepVal.getSExtValue();
- if (PtrAccessSize != StepVal)
- return nullptr;
- V = M->getOperand(1);
- }
- }
-
- // Strip off casts.
- Type *StripedOffRecurrenceCast = nullptr;
- if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
- StripedOffRecurrenceCast = C->getType();
- V = C->getOperand();
- }
-
- // Look for the loop invariant symbolic value.
- const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
- if (!U)
- return nullptr;
-
- Value *Stride = U->getValue();
- if (!Lp->isLoopInvariant(Stride))
- return nullptr;
-
- // If we have stripped off the recurrence cast we have to make sure that we
- // return the value that is used in this loop so that we can replace it later.
- if (StripedOffRecurrenceCast)
- Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
-
- return Stride;
-}
-
void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) {
Value *Ptr = nullptr;
if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
@@ -4585,7 +4450,7 @@ LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
// Width 1 means no vectorize
VectorizationFactor Factor = { 1U, 0U };
- if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
+ if (OptForSize && Legal->getRuntimePointerChecking()->Need) {
emitAnalysis(VectorizationReport() <<
"runtime pointer checks needed. Enable vectorization of this "
"loop with '#pragma clang loop vectorize(enable)' when "
@@ -4745,41 +4610,40 @@ unsigned LoopVectorizationCostModel::getWidestType() {
return MaxWidth;
}
-unsigned
-LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
- unsigned VF,
- unsigned LoopCost) {
+unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
+ unsigned VF,
+ unsigned LoopCost) {
- // -- The unroll heuristics --
- // We unroll the loop in order to expose ILP and reduce the loop overhead.
+ // -- The interleave heuristics --
+ // We interleave the loop in order to expose ILP and reduce the loop overhead.
// There are many micro-architectural considerations that we can't predict
// at this level. For example, frontend pressure (on decode or fetch) due to
// code size, or the number and capabilities of the execution ports.
//
- // We use the following heuristics to select the unroll factor:
- // 1. If the code has reductions, then we unroll in order to break the cross
+ // We use the following heuristics to select the interleave count:
+ // 1. If the code has reductions, then we interleave to break the cross
// iteration dependency.
- // 2. If the loop is really small, then we unroll in order to reduce the loop
+ // 2. If the loop is really small, then we interleave to reduce the loop
// overhead.
- // 3. We don't unroll if we think that we will spill registers to memory due
- // to the increased register pressure.
+ // 3. We don't interleave if we think that we will spill registers to memory
+ // due to the increased register pressure.
// Use the user preference, unless 'auto' is selected.
int UserUF = Hints->getInterleave();
if (UserUF != 0)
return UserUF;
- // When we optimize for size, we don't unroll.
+ // When we optimize for size, we don't interleave.
if (OptForSize)
return 1;
- // We used the distance for the unroll factor.
+ // We used the distance for the interleave count.
if (Legal->getMaxSafeDepDistBytes() != -1U)
return 1;
- // Do not unroll loops with a relatively small trip count.
+ // Do not interleave loops with a relatively small trip count.
unsigned TC = SE->getSmallConstantTripCount(TheLoop);
- if (TC > 1 && TC < TinyTripCountUnrollThreshold)
+ if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
return 1;
unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
@@ -4800,32 +4664,32 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
R.NumInstructions = std::max(R.NumInstructions, 1U);
- // We calculate the unroll factor using the following formula.
+ // We calculate the interleave count using the following formula.
// Subtract the number of loop invariants from the number of available
- // registers. These registers are used by all of the unrolled instances.
+ // registers. These registers are used by all of the interleaved instances.
// Next, divide the remaining registers by the number of registers that is
// required by the loop, in order to estimate how many parallel instances
// fit without causing spills. All of this is rounded down if necessary to be
- // a power of two. We want power of two unroll factors to simplify any
+ // a power of two. We want power of two interleave count to simplify any
// addressing operations or alignment considerations.
- unsigned UF = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
+ unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
R.MaxLocalUsers);
- // Don't count the induction variable as unrolled.
+ // Don't count the induction variable as interleaved.
if (EnableIndVarRegisterHeur)
- UF = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
+ IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
std::max(1U, (R.MaxLocalUsers - 1)));
- // Clamp the unroll factor ranges to reasonable factors.
- unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor(VF);
+ // Clamp the interleave ranges to reasonable counts.
+ unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
- // Check if the user has overridden the unroll max.
+ // Check if the user has overridden the max.
if (VF == 1) {
if (ForceTargetMaxScalarInterleaveFactor.getNumOccurrences() > 0)
- MaxInterleaveSize = ForceTargetMaxScalarInterleaveFactor;
+ MaxInterleaveCount = ForceTargetMaxScalarInterleaveFactor;
} else {
if (ForceTargetMaxVectorInterleaveFactor.getNumOccurrences() > 0)
- MaxInterleaveSize = ForceTargetMaxVectorInterleaveFactor;
+ MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor;
}
// If we did not calculate the cost for VF (because the user selected the VF)
@@ -4833,72 +4697,74 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
if (LoopCost == 0)
LoopCost = expectedCost(VF);
- // Clamp the calculated UF to be between the 1 and the max unroll factor
+ // Clamp the calculated IC to be between the 1 and the max interleave count
// that the target allows.
- if (UF > MaxInterleaveSize)
- UF = MaxInterleaveSize;
- else if (UF < 1)
- UF = 1;
+ if (IC > MaxInterleaveCount)
+ IC = MaxInterleaveCount;
+ else if (IC < 1)
+ IC = 1;
- // Unroll if we vectorized this loop and there is a reduction that could
- // benefit from unrolling.
+ // Interleave if we vectorized this loop and there is a reduction that could
+ // benefit from interleaving.
if (VF > 1 && Legal->getReductionVars()->size()) {
- DEBUG(dbgs() << "LV: Unrolling because of reductions.\n");
- return UF;
+ DEBUG(dbgs() << "LV: Interleaving because of reductions.\n");
+ return IC;
}
// Note that if we've already vectorized the loop we will have done the
- // runtime check and so unrolling won't require further checks.
- bool UnrollingRequiresRuntimePointerCheck =
- (VF == 1 && Legal->getRuntimePointerCheck()->Need);
+ // runtime check and so interleaving won't require further checks.
+ bool InterleavingRequiresRuntimePointerCheck =
+ (VF == 1 && Legal->getRuntimePointerChecking()->Need);
- // We want to unroll small loops in order to reduce the loop overhead and
+ // We want to interleave small loops in order to reduce the loop overhead and
// potentially expose ILP opportunities.
DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
- if (!UnrollingRequiresRuntimePointerCheck &&
- LoopCost < SmallLoopCost) {
+ if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) {
// We assume that the cost overhead is 1 and we use the cost model
- // to estimate the cost of the loop and unroll until the cost of the
+ // to estimate the cost of the loop and interleave until the cost of the
// loop overhead is about 5% of the cost of the loop.
- unsigned SmallUF = std::min(UF, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost));
+ unsigned SmallIC =
+ std::min(IC, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost));
- // Unroll until store/load ports (estimated by max unroll factor) are
+ // Interleave until store/load ports (estimated by max interleave count) are
// saturated.
unsigned NumStores = Legal->getNumStores();
unsigned NumLoads = Legal->getNumLoads();
- unsigned StoresUF = UF / (NumStores ? NumStores : 1);
- unsigned LoadsUF = UF / (NumLoads ? NumLoads : 1);
+ unsigned StoresIC = IC / (NumStores ? NumStores : 1);
+ unsigned LoadsIC = IC / (NumLoads ? NumLoads : 1);
// If we have a scalar reduction (vector reductions are already dealt with
// by this point), we can increase the critical path length if the loop
- // we're unrolling is inside another loop. Limit, by default to 2, so the
+ // we're interleaving is inside another loop. Limit, by default to 2, so the
// critical path only gets increased by one reduction operation.
if (Legal->getReductionVars()->size() &&
TheLoop->getLoopDepth() > 1) {
- unsigned F = static_cast<unsigned>(MaxNestedScalarReductionUF);
- SmallUF = std::min(SmallUF, F);
- StoresUF = std::min(StoresUF, F);
- LoadsUF = std::min(LoadsUF, F);
+ unsigned F = static_cast<unsigned>(MaxNestedScalarReductionIC);
+ SmallIC = std::min(SmallIC, F);
+ StoresIC = std::min(StoresIC, F);
+ LoadsIC = std::min(LoadsIC, F);
}
- if (EnableLoadStoreRuntimeUnroll && std::max(StoresUF, LoadsUF) > SmallUF) {
- DEBUG(dbgs() << "LV: Unrolling to saturate store or load ports.\n");
- return std::max(StoresUF, LoadsUF);
+ if (EnableLoadStoreRuntimeInterleave &&
+ std::max(StoresIC, LoadsIC) > SmallIC) {
+ DEBUG(dbgs() << "LV: Interleaving to saturate store or load ports.\n");
+ return std::max(StoresIC, LoadsIC);
}
- DEBUG(dbgs() << "LV: Unrolling to reduce branch cost.\n");
- return SmallUF;
+ DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
+ return SmallIC;
}
- // Unroll if this is a large loop (small loops are already dealt with by this
- // point) that could benefit from interleaved unrolling.
+ // Interleave if this is a large loop (small loops are already dealt with by
+ // this
+ // point) that could benefit from interleaving.
bool HasReductions = (Legal->getReductionVars()->size() > 0);
if (TTI.enableAggressiveInterleaving(HasReductions)) {
- DEBUG(dbgs() << "LV: Unrolling to expose ILP.\n");
- return UF;
+ DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
+ return IC;
}
- DEBUG(dbgs() << "LV: Not Unrolling.\n");
+ DEBUG(dbgs() << "LV: Not Interleaving.\n");
return 1;
}
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7c4c279dcf4d..7bac407e77e9 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -69,8 +69,13 @@ static cl::opt<bool> ShouldStartVectorizeHorAtStore(
cl::desc(
"Attempt to vectorize horizontal reductions feeding into a store"));
+static cl::opt<int>
+MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
+ cl::desc("Attempt to vectorize for this register size in bits"));
+
namespace {
+// FIXME: Set this via cl::opt to allow overriding.
static const unsigned MinVecRegSize = 128;
static const unsigned RecursionMaxDepth = 12;
@@ -2136,9 +2141,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
// Prepare the operand vector.
- for (unsigned j = 0; j < E->Scalars.size(); ++j)
- Operands.push_back(cast<PHINode>(E->Scalars[j])->
- getIncomingValueForBlock(IBB));
+ for (Value *V : E->Scalars)
+ Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(IBB));
Builder.SetInsertPoint(IBB->getTerminator());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
@@ -2172,8 +2176,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::FPTrunc:
case Instruction::BitCast: {
ValueList INVL;
- for (int i = 0, e = E->Scalars.size(); i < e; ++i)
- INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+ for (Value *V : E->Scalars)
+ INVL.push_back(cast<Instruction>(V)->getOperand(0));
setInsertPointAfterBundle(E->Scalars);
@@ -2191,9 +2195,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::FCmp:
case Instruction::ICmp: {
ValueList LHSV, RHSV;
- for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
- LHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
- RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+ for (Value *V : E->Scalars) {
+ LHSV.push_back(cast<Instruction>(V)->getOperand(0));
+ RHSV.push_back(cast<Instruction>(V)->getOperand(1));
}
setInsertPointAfterBundle(E->Scalars);
@@ -2217,10 +2221,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::Select: {
ValueList TrueVec, FalseVec, CondVec;
- for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
- CondVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
- TrueVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
- FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2));
+ for (Value *V : E->Scalars) {
+ CondVec.push_back(cast<Instruction>(V)->getOperand(0));
+ TrueVec.push_back(cast<Instruction>(V)->getOperand(1));
+ FalseVec.push_back(cast<Instruction>(V)->getOperand(2));
}
setInsertPointAfterBundle(E->Scalars);
@@ -2259,9 +2263,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
else
- for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
- LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
- RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+ for (Value *V : E->Scalars) {
+ LHSVL.push_back(cast<Instruction>(V)->getOperand(0));
+ RHSVL.push_back(cast<Instruction>(V)->getOperand(1));
}
setInsertPointAfterBundle(E->Scalars);
@@ -2322,8 +2326,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
unsigned AS = SI->getPointerAddressSpace();
ValueList ValueOp;
- for (int i = 0, e = E->Scalars.size(); i < e; ++i)
- ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand());
+ for (Value *V : E->Scalars)
+ ValueOp.push_back(cast<StoreInst>(V)->getValueOperand());
setInsertPointAfterBundle(E->Scalars);
@@ -2351,8 +2355,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
setInsertPointAfterBundle(E->Scalars);
ValueList Op0VL;
- for (int i = 0, e = E->Scalars.size(); i < e; ++i)
- Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0));
+ for (Value *V : E->Scalars)
+ Op0VL.push_back(cast<GetElementPtrInst>(V)->getOperand(0));
Value *Op0 = vectorizeTree(Op0VL);
@@ -2360,8 +2364,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
++j) {
ValueList OpVL;
- for (int i = 0, e = E->Scalars.size(); i < e; ++i)
- OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j));
+ for (Value *V : E->Scalars)
+ OpVL.push_back(cast<GetElementPtrInst>(V)->getOperand(j));
Value *OpVec = vectorizeTree(OpVL);
OpVecs.push_back(OpVec);
@@ -2397,8 +2401,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
OpVecs.push_back(CEI->getArgOperand(j));
continue;
}
- for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
- CallInst *CEI = cast<CallInst>(E->Scalars[i]);
+ for (Value *V : E->Scalars) {
+ CallInst *CEI = cast<CallInst>(V);
OpVL.push_back(CEI->getArgOperand(j));
}
@@ -3089,6 +3093,17 @@ struct SLPVectorizer : public FunctionPass {
if (!TTI->getNumberOfRegisters(true))
return false;
+ // Use the vector register size specified by the target unless overridden
+ // by a command-line option.
+ // TODO: It would be better to limit the vectorization factor based on
+ // data type rather than just register size. For example, x86 AVX has
+ // 256-bit registers, but it does not support integer operations
+ // at that width (that requires AVX2).
+ if (MaxVectorRegSizeOption.getNumOccurrences())
+ MaxVecRegSize = MaxVectorRegSizeOption;
+ else
+ MaxVecRegSize = TTI->getRegisterBitWidth(true);
+
// Don't vectorize when the attribute NoImplicitFloat is used.
if (F.hasFnAttribute(Attribute::NoImplicitFloat))
return false;
@@ -3166,12 +3181,13 @@ private:
bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);
bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold,
- BoUpSLP &R);
+ BoUpSLP &R, unsigned VecRegSize);
bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold,
BoUpSLP &R);
private:
StoreListMap StoreRefs;
+ unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt.
};
/// \brief Check that the Values in the slice in VL array are still existent in
@@ -3186,14 +3202,15 @@ static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
}
bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
- int CostThreshold, BoUpSLP &R) {
+ int CostThreshold, BoUpSLP &R,
+ unsigned VecRegSize) {
unsigned ChainLen = Chain.size();
DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
<< "\n");
Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout();
unsigned Sz = DL.getTypeSizeInBits(StoreTy);
- unsigned VF = MinVecRegSize / Sz;
+ unsigned VF = VecRegSize / Sz;
if (!isPowerOf2_32(Sz) || VF < 2)
return false;
@@ -3277,12 +3294,16 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
I = ConsecutiveChain[I];
}
- bool Vectorized = vectorizeStoreChain(Operands, costThreshold, R);
-
- // Mark the vectorized stores so that we don't vectorize them again.
- if (Vectorized)
- VectorizedStores.insert(Operands.begin(), Operands.end());
- Changed |= Vectorized;
+ // FIXME: Is division-by-2 the correct step? Should we assert that the
+ // register size is a power-of-2?
+ for (unsigned Size = MaxVecRegSize; Size >= MinVecRegSize; Size /= 2) {
+ if (vectorizeStoreChain(Operands, costThreshold, R, Size)) {
+ // Mark the vectorized stores so that we don't vectorize them again.
+ VectorizedStores.insert(Operands.begin(), Operands.end());
+ Changed = true;
+ break;
+ }
+ }
}
return Changed;
@@ -3293,8 +3314,8 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
unsigned count = 0;
StoreRefs.clear();
const DataLayout &DL = BB->getModule()->getDataLayout();
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- StoreInst *SI = dyn_cast<StoreInst>(it);
+ for (Instruction &I : *BB) {
+ StoreInst *SI = dyn_cast<StoreInst>(&I);
if (!SI)
continue;
@@ -3342,13 +3363,15 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
Type *Ty0 = I0->getType();
unsigned Sz = DL.getTypeSizeInBits(Ty0);
+ // FIXME: Register size should be a parameter to this function, so we can
+ // try different vectorization factors.
unsigned VF = MinVecRegSize / Sz;
- for (int i = 0, e = VL.size(); i < e; ++i) {
- Type *Ty = VL[i]->getType();
+ for (Value *V : VL) {
+ Type *Ty = V->getType();
if (!isValidElementType(Ty))
return false;
- Instruction *Inst = dyn_cast<Instruction>(VL[i]);
+ Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst || Inst->getOpcode() != Opcode0)
return false;
}
@@ -3571,6 +3594,8 @@ public:
const DataLayout &DL = B->getModule()->getDataLayout();
ReductionOpcode = B->getOpcode();
ReducedValueOpcode = 0;
+ // FIXME: Register size should be a parameter to this function, so we can
+ // try different vectorization factors.
ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
ReductionRoot = B;
ReductionPHI = Phi;
@@ -3997,6 +4022,9 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
<< it->second.size() << ".\n");
// Process the stores in chunks of 16.
+ // TODO: The limit of 16 inhibits greater vectorization factors.
+ // For example, AVX2 supports v32i8. Increasing this limit, however,
+ // may cause a significant compile-time increase.
for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {
unsigned Len = std::min<unsigned>(CE - CI, 16);
Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len),